blob: 4a54d6d4f094e9b6cb2610e6c1829a601c9c42fd [file] [log] [blame]
From 20f67f157c2284057328e6391d10e329b088f8d2 Mon Sep 17 00:00:00 2001
From: Rob Landley <rob@landley.net>
Date: Thu, 30 Jun 2016 10:39:41 -0500
Subject: [PATCH] Teach wc to do -cm together. Fix testsuite so TEST_HOST
passes too.
---
tests/wc.test | 13 +++++-----
toys/posix/wc.c | 73 +++++++++++++++++++++++++++++++--------------------------
2 files changed, 46 insertions(+), 40 deletions(-)
diff --git a/tests/wc.test b/tests/wc.test
index abb237e..d227b9c 100755
--- a/tests/wc.test
+++ b/tests/wc.test
@@ -12,16 +12,14 @@ lines
EOF
testing "wc" "wc >/dev/null && echo yes" "yes\n" "" ""
-testing "empty file" "wc" "0 0 0\n" "" ""
-testing "standard input" "wc" "1 3 5\n" "" "a b\nc"
+testing "empty file" "wc" " 0 0 0\n" "" ""
+testing "standard input" "wc" " 1 3 5\n" "" "a b\nc"
testing "-c" "wc -c file1" "26 file1\n" "" ""
testing "-l" "wc -l file1" "4 file1\n" "" ""
testing "-w" "wc -w file1" "5 file1\n" "" ""
-testing "format" "wc file1" "4 5 26 file1\n" "" ""
+NOSPACE=1 testing "format" "wc file1" " 4 5 26 file1\n" "" ""
testing "multiple files" "wc input - file1" \
- "1 2 3 input\n0 2 3 -\n4 5 26 file1\n5 9 32 total\n" "a\nb" "a b"
-
-optional TOYBOX_I18N
+ " 1 2 3 input\n 0 2 3 -\n 4 5 26 file1\n 5 9 32 total\n" "a\nb" "a b"
#Tests for wc -m
if printf "%s" "$LANG" | grep -q UTF-8
@@ -33,13 +31,14 @@ do
printf "ü" >> file1
done
testing "-m" "wc -m file1" "8193 file1\n" "" ""
+testing "-m 2" 'cat "$FILES/utf8/test2.txt" | wc -m' "169\n" "" ""
printf " " > file1
for i in $(seq 1 8192)
do
printf "ü" >> file1
done
testing "-m (invalid chars)" "wc -m file1" "8193 file1\n" "" ""
-testing "-mlw" "wc -mlw input" "1 2 11 input\n" "hello, 世界!\n" ""
+NOSPACE=1 testing "-mlw" "wc -mlw input" " 1 2 11 input\n" "hello, 世界!\n" ""
else
printf "skipping tests for wc -m"
diff --git a/toys/posix/wc.c b/toys/posix/wc.c
index e7afc81..a8c3e45 100644
--- a/toys/posix/wc.c
+++ b/toys/posix/wc.c
@@ -4,7 +4,7 @@
*
* See http://opengroup.org/onlinepubs/9699919799/utilities/wc.html
-USE_WC(NEWTOY(wc, USE_TOYBOX_I18N("m")"cwl[!cm]", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
+USE_WC(NEWTOY(wc, "mcwl", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
config WC
bool "wc"
@@ -28,68 +28,75 @@ config WC
#include "toys.h"
GLOBALS(
- unsigned long totals[3];
+ unsigned long totals[4];
)
static void show_lengths(unsigned long *lengths, char *name)
{
- int i, nospace = 1;
- for (i=0; i<3; i++) {
- if (!toys.optflags || (toys.optflags&(1<<i))) {
- xprintf(" %ld"+nospace, lengths[i]);
- nospace = 0;
+ int i, space = 7, first = 1;
+
+ for (i = 0; i<4; i++) if (toys.optflags == (1<<i)) space = 0;
+ for (i = 0; i<4; i++) {
+ if (toys.optflags&(1<<i)) {
+ printf(" %*ld"+first, space, lengths[i]);
+ first = 0;
}
TT.totals[i] += lengths[i];
}
- if (*toys.optargs) xprintf(" %s", name);
+ if (*toys.optargs) printf(" %s", name);
xputc('\n');
}
static void do_wc(int fd, char *name)
{
- int i, len, clen=1, space;
- unsigned long word=0, lengths[]={0,0,0};
+ int len = 0, clen = 1, space = 0;
+ unsigned long word = 0, lengths[] = {0,0,0,0};
+ // Speed up common case: wc -c normalfile is file length.
if (toys.optflags == FLAG_c) {
struct stat st;
// On Linux, files in /proc often report their size as 0.
- if (!fstat(fd, &st) && S_ISREG(st.st_mode) && st.st_size > 0) {
+ if (!fstat(fd, &st) && S_ISREG(st.st_mode) && st.st_size) {
lengths[2] = st.st_size;
goto show;
}
}
for (;;) {
- len = read(fd, toybuf, sizeof(toybuf));
- if (len<0) perror_msg_raw(name);
- if (len<1) break;
- if (toys.optflags == FLAG_c) {
- lengths[2] += len;
- continue;
- }
- for (i=0; i<len; i+=clen) {
- wchar_t wchar;
-
- if (CFG_TOYBOX_I18N && (toys.optflags&FLAG_m)) {
- clen = mbrtowc(&wchar, toybuf+i, len-i, 0);
- if (clen == -1) {
- clen = 1;
- continue;
+ int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len);
+
+ if (len2<0) perror_msg_raw(name);
+ else len += len2;
+ if (len2<1) done++;
+
+ for (pos = 0; pos<len; pos++) {
+ if (toybuf[pos]=='\n') lengths[0]++;
+ lengths[2]++;
+ if (toys.optflags&FLAG_m) {
+ // If we've consumed next wide char
+ if (--clen<1) {
+ wchar_t wchar;
+
+ // next wide size, don't count invalid, fetch more data if necessary
+ clen = mbrtowc(&wchar, toybuf+pos, len-pos, 0);
+ if (clen == -1) continue;
+ if (clen == -2 && !done) break;
+
+ lengths[3]++;
+ space = iswspace(wchar);
}
- if (clen == -2) break;
- if (clen == 0) clen=1;
- space = iswspace(wchar);
- } else space = isspace(toybuf[i]);
+ } else space = isspace(toybuf[pos]);
- if (toybuf[i]==10) lengths[0]++;
if (space) word=0;
else {
if (!word) lengths[1]++;
word=1;
}
- lengths[2]++;
}
+ if (done) break;
+ if (pos != len) memmove(toybuf, toybuf+pos, len-pos);
+ len -= pos;
}
show:
@@ -98,7 +105,7 @@ show:
void wc_main(void)
{
- toys.optflags |= (toys.optflags&8)>>1;
+ if (!toys.optflags) toys.optflags = FLAG_l|FLAG_w|FLAG_c;
loopfiles(toys.optargs, do_wc);
if (toys.optc>1) show_lengths(TT.totals, "total");
}
--
2.8.0.rc3.226.g39d4020