Track bits corrected per subpage.
Note for future kernel updates: upstream Linux handles this
differently, by reading each subpage individually and tracking
errors that way. This patch should not need to be ported to
a later kernel version.
Change-Id: If7f1be1162ea5e9b0b04faf9855a5b464e5bdf62
diff --git a/drivers/mtd/nand/comcerto_nand.c b/drivers/mtd/nand/comcerto_nand.c
index 4904789..d5ca078 100644
--- a/drivers/mtd/nand/comcerto_nand.c
+++ b/drivers/mtd/nand/comcerto_nand.c
@@ -422,6 +422,7 @@
uint8_t *oob = nand_device->oob_poi;
for (; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+
chip->ecc.hwctl(mtd, NAND_ECC_READ);
chip->read_buf(mtd, p, eccsize);
chip->read_buf(mtd, ecc_code, ecc_bytes);
@@ -429,8 +430,15 @@
stat = chip->ecc.correct(mtd, p, oob, NULL);
if (stat < 0)
mtd->ecc_stats.failed++;
- else
+ else {
+ int idx = eccsteps;
+ if (idx >= MTD_ECC_STAT_SUBPAGES) {
+ idx = MTD_ECC_STAT_SUBPAGES - 1;
+ }
+
mtd->ecc_stats.corrected += stat;
+ mtd->ecc_subpage_stats.subpage_corrected[idx] += stat;
+ }
comcerto_ecc_shift(ECC_SHIFT_DISABLE);
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 137e578..1b3d432 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1422,46 +1422,73 @@
}
/*
- * NOTE(apenwarr): Newer kernels do this much better.
- * Among other things, they report a max_flips value that's the largest
- * number of flips in any 1024-byte ECC calculation, as opposed to the total
- * flips in the whole 4096-byte page. The latter is dangerous because
- * you could see 24 flips in a single 1024-byte region, which is the edge
- * of disaster, even though it's only 1/4 of the maximum 96 flips we could
- * handle if averaged across 4 pages. So where we'd like to set a threshold
- * per 1024-byte region, we instead have to set a threshold per
- * 4096-byte region that *still* must be well under 24.
+ * NOTE(dgentry): Newer kernels do this in a different, and much better, way.
+ * The upstream mtd APIs to NAND drivers know about subpages and allow errors
+ * to be reported on a per-subpage level.
+ *
+ * Here, we judge errors in two ways:
+ * 1. If the underlying NAND driver reported errors per sub-page
+ * via mtd_ecc_subpage_stats, we check that the number of corrected
+ * bits is within a safe distance from the maximum number of bits
+ * we can correct. At the time of this writing only comcerto_nand.c
+ * reports per-subpage errors.
+ * 2. We check the number of bits corrected on the entire page. For
+ * example, we might allow up to 72 bits to be corrected on a 4096
+ * byte page. This is dangerous because there is a big difference between
+ * having 18 bits corrected on each 1024 byte sub-page versus having
+ * 72 bits corrected all on one subpage.
+ * Nonetheless if the NAND driver only reports stats using struct
+ * mtd_ecc_stats, this is the best we can do.
*
* Anyway, this code can go away someday when we use a newer kernel.
*/
static int unclean_if_too_many_flips(struct mtd_info *mtd,
- struct mtd_ecc_stats *stats) {
+ struct mtd_ecc_stats *stats,
+ struct mtd_ecc_subpage_stats *subpage_stats) {
uint32_t flips = mtd->ecc_stats.corrected - stats->corrected;
- uint32_t threshold;
+ uint32_t threshold, subpage_threshold;
+ int i, rc = 0;
+
switch (mtd->oobsize) {
case 8:
case 16:
case 64:
threshold = 0;
+ subpage_threshold = 0;
break;
case 128:
threshold = 4;
+ subpage_threshold = 2;
break;
case 224:
threshold = 72;
+ subpage_threshold = 18;
break;
default:
threshold = 0;
+ subpage_threshold = 0;
break;
}
if (flips > threshold / 2) {
- // This should be very rare, bu we want to know as we
+ // This should be very rare, but we want to know as we
// approach our threshold, which should be even more rare.
printk_ratelimited(KERN_WARNING
"ECC: corrected %d bits (threshold=%d)\n",
flips, threshold);
}
- return flips > threshold ? -EUCLEAN : 0;
+ if (flips > threshold) rc = -EUCLEAN;
+ for (i = 0; i < MTD_ECC_STAT_SUBPAGES; i++) {
+ flips = mtd->ecc_subpage_stats.subpage_corrected[i] -
+ subpage_stats->subpage_corrected[i];
+ if (flips > subpage_threshold / 2) {
+ printk_ratelimited(KERN_WARNING
+ "ECC: corrected %d bits in one subpage "
+ "(threshold=%d)\n", flips, subpage_threshold);
+ }
+ if (flips > subpage_threshold) rc = -EUCLEAN;
+ }
+
+ return rc;
}
/**
@@ -1478,6 +1505,7 @@
int chipnr, page, realpage, col, bytes, aligned;
struct nand_chip *chip = mtd->priv;
struct mtd_ecc_stats stats;
+ struct mtd_ecc_subpage_stats subpage_stats;
int blkcheck = (1 << (chip->phys_erase_shift - chip->page_shift)) - 1;
int sndcmd = 1;
int ret = 0;
@@ -1489,6 +1517,7 @@
uint8_t *bufpoi, *oob, *buf;
stats = mtd->ecc_stats;
+ subpage_stats = mtd->ecc_subpage_stats;
chipnr = (int)(from >> chip->chip_shift);
chip->select_chip(mtd, chipnr);
@@ -1610,7 +1639,7 @@
if (mtd->ecc_stats.failed - stats.failed)
return -EBADMSG;
- return unclean_if_too_many_flips(mtd, &stats);
+ return unclean_if_too_many_flips(mtd, &stats, &subpage_stats);
}
/**
@@ -1805,6 +1834,7 @@
int page, realpage, chipnr, sndcmd = 1;
struct nand_chip *chip = mtd->priv;
struct mtd_ecc_stats stats;
+ struct mtd_ecc_subpage_stats subpage_stats;
int blkcheck = (1 << (chip->phys_erase_shift - chip->page_shift)) - 1;
int readlen = ops->ooblen;
int len;
@@ -1814,6 +1844,7 @@
__func__, (unsigned long long)from, readlen);
stats = mtd->ecc_stats;
+ subpage_stats = mtd->ecc_subpage_stats;
if (ops->mode == MTD_OPS_AUTO_OOB)
len = chip->ecc.layout->oobavail;
@@ -1892,7 +1923,7 @@
if (mtd->ecc_stats.failed - stats.failed)
return -EBADMSG;
- return unclean_if_too_many_flips(mtd, &stats);
+ return unclean_if_too_many_flips(mtd, &stats, &subpage_stats);
}
/**
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 629401a..86d8ebb 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -262,6 +262,8 @@
/* ECC status information */
struct mtd_ecc_stats ecc_stats;
+ struct mtd_ecc_subpage_stats ecc_subpage_stats;
+
/* Subpage shift (NAND) */
int subpage_sft;
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index ec5aeaa..013f032 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -251,6 +251,11 @@
__u32 bbtblocks;
};
+struct mtd_ecc_subpage_stats {
+#define MTD_ECC_STAT_SUBPAGES 8
+ __u32 subpage_corrected[MTD_ECC_STAT_SUBPAGES];
+};
+
/*
* MTD file modes - for read/write access to MTD
*