summary refs log tree commit diff
diff options
context:
space:
mode:
authorStefan Potyra <stefan.potyra@informatik.uni-erlangen.de>2009-08-11 12:46:26 +1000
committerHerbert Xu <herbert@gondor.apana.org.au>2009-08-11 12:46:26 +1000
commit55c46b7286f5d9f2d8291158203e2b61d2494420 (patch)
treef4ea6cd236d953cd196ff85d4a447bcd062a2257
parent[EXPAND] Fix quoted pattern patch breakage (diff)
downloaddash-55c46b7286f5d9f2d8291158203e2b61d2494420.tar.gz
dash-55c46b7286f5d9f2d8291158203e2b61d2494420.zip
[BUILTIN] Honor tab as IFS whitespace when splitting fields in readcmd
When I try to split fields by tabs, dash doesn't honour multiple tabs
between fields as whitespace (at least that's how I interpret [1],
please correct me if I'm wrong).

#!/bin/sh

# "1\t2\t\t3"
TESTSTRING="1	2		3"

# only "\t"
IFS="	"
echo "$TESTSTRING" | while read p1 p2 p3; do
	echo "p1=${p1}, p2=${p2}, p3=${p3}"
done

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--ChangeLog4
-rw-r--r--src/expand.c9
-rw-r--r--src/expand.h3
-rw-r--r--src/miscbltin.c97
4 files changed, 78 insertions, 35 deletions
diff --git a/ChangeLog b/ChangeLog
index e6a1d26..247b6e8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2009-08-11  Stefan Potyra <stefan.potyra@informatik.uni-erlangen.de>
+
+	* Honor tab as IFS whitespace when splitting fields in readcmd.
+
 2009-06-27  Herbert Xu <herbert@gondor.apana.org.au>
 
 	* Fix quoted pattern patch breakage.
diff --git a/src/expand.c b/src/expand.c
index 7995d40..48c45e5 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -117,9 +117,6 @@ STATIC char *evalvar(char *, int);
 STATIC size_t strtodest(const char *, const char *, int);
 STATIC void memtodest(const char *, size_t, const char *, int);
 STATIC ssize_t varvalue(char *, int, int);
-STATIC void recordregion(int, int, int);
-STATIC void removerecordregions(int); 
-STATIC void ifsbreakup(char *, struct arglist *);
 STATIC void ifsfree(void);
 STATIC void expandmeta(struct strlist *, int);
 #ifdef HAVE_GLOB
@@ -412,7 +409,7 @@ lose:
 }
 
 
-STATIC void 
+void 
 removerecordregions(int endoff)
 {
 	if (ifslastp == NULL)
@@ -1001,7 +998,7 @@ value:
  * string for IFS characters.
  */
 
-STATIC void
+void
 recordregion(int start, int end, int nulonly)
 {
 	struct ifsregion *ifsp;
@@ -1028,7 +1025,7 @@ recordregion(int start, int end, int nulonly)
  * strings to the argument list.  The regions of the string to be
  * searched for IFS characters have been stored by recordregion.
  */
-STATIC void
+void
 ifsbreakup(char *string, struct arglist *arglist)
 {
 	struct ifsregion *ifsp;
diff --git a/src/expand.h b/src/expand.h
index 1862aea..405af0b 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -67,6 +67,9 @@ void expari(int);
 #define rmescapes(p) _rmescapes((p), 0)
 char *_rmescapes(char *, int);
 int casematch(union node *, char *);
+void recordregion(int, int, int);
+void removerecordregions(int); 
+void ifsbreakup(char *, struct arglist *);
 
 /* From arith.y */
 intmax_t arith(const char *);
diff --git a/src/miscbltin.c b/src/miscbltin.c
index 3f91bc3..cca0f6c 100644
--- a/src/miscbltin.c
+++ b/src/miscbltin.c
@@ -55,14 +55,73 @@
 #include "miscbltin.h"
 #include "mystring.h"
 #include "main.h"
+#include "expand.h"
+#include "parser.h"
 
 #undef rflag
 
 
+/** handle one line of the read command.
+ *  more fields than variables -> remainder shall be part of last variable.
+ *  less fields than variables -> remaining variables unset.
+ *
+ *  @param line complete line of input
+ *  @param ap argument (variable) list
+ *  @param len length of line including trailing '\0'
+ */
+static void
+readcmd_handle_line(char *line, char **ap, size_t len)
+{
+	struct arglist arglist;
+	struct strlist *sl;
+	char *s, *backup;
+
+	/* ifsbreakup will fiddle with stack region... */
+	s = grabstackstr(line + len);
+
+	/* need a copy, so that delimiters aren't lost
+	 * in case there are more fields than variables */
+	backup = sstrdup(line);
+
+	arglist.lastp = &arglist.list;
+	recordregion(0, len, 0);
+	
+	ifsbreakup(s, &arglist);
+	*arglist.lastp = NULL;
+	removerecordregions(0);
+
+	for (sl = arglist.list; sl; sl = sl->next) {
+		/* remaining fields present, but no variables left. */
+		if (!ap[1]) {
+			size_t offset;
+			char *remainder;
+
+			/* FIXME little bit hacky, assuming that ifsbreakup 
+			 * will not modify the length of the string */
+			offset = sl->text - s;
+			remainder = backup + offset;
+			rmescapes(remainder);
+			setvar(*ap, remainder, 0);
+
+			return;
+		}
+		
+		/* set variable to field */
+		rmescapes(sl->text);
+		setvar(*ap, sl->text, 0);
+		ap++;
+	}
+
+	/* nullify remaining arguments */
+	do {
+		setvar(*ap, nullstr, 0);
+	} while (*++ap);
+}
 
 /*
  * The read builtin.  The -e option causes backslashes to escape the
- * following character.
+ * following character. The -p option followed by an argument prompts
+ * with the argument.
  *
  * This uses unbuffered input, which may be avoidable in some cases.
  */
@@ -75,9 +134,7 @@ readcmd(int argc, char **argv)
 	char c;
 	int rflag;
 	char *prompt;
-	const char *ifs;
 	char *p;
-	int startword;
 	int status;
 	int i;
 
@@ -97,10 +154,7 @@ readcmd(int argc, char **argv)
 	}
 	if (*(ap = argptr) == NULL)
 		sh_error("arg count");
-	if ((ifs = bltinlookup("IFS")) == NULL)
-		ifs = defifs;
 	status = 0;
-	startword = 1;
 	backslash = 0;
 	STARTSTACKSTR(p);
 	for (;;) {
@@ -111,10 +165,10 @@ readcmd(int argc, char **argv)
 		if (c == '\0')
 			continue;
 		if (backslash) {
-			backslash = 0;
-			if (c != '\n')
-				goto put;
-			continue;
+			if (c == '\n')
+				goto resetbs;
+			STPUTC(CTLESC, p);
+			goto put;
 		}
 		if (!rflag && c == '\\') {
 			backslash++;
@@ -122,28 +176,13 @@ readcmd(int argc, char **argv)
 		}
 		if (c == '\n')
 			break;
-		if (startword && *ifs == ' ' && strchr(ifs, c)) {
-			continue;
-		}
-		startword = 0;
-		if (ap[1] != NULL && strchr(ifs, c) != NULL) {
-			STACKSTRNUL(p);
-			setvar(*ap, stackblock(), 0);
-			ap++;
-			startword = 1;
-			STARTSTACKSTR(p);
-		} else {
 put:
-			STPUTC(c, p);
-		}
+		STPUTC(c, p);
+resetbs:
+		backslash = 0;
 	}
 	STACKSTRNUL(p);
-	/* Remove trailing blanks */
-	while ((char *)stackblock() <= --p && strchr(ifs, *p) != NULL)
-		*p = '\0';
-	setvar(*ap, stackblock(), 0);
-	while (*++ap != NULL)
-		setvar(*ap, nullstr, 0);
+	readcmd_handle_line(stackblock(), ap, p - (char *)stackblock());
 	return status;
 }