textutil: Simplify and speedup RuneChunkDecoder, add linewrap.

[NOTE: nobody actually uses the RuneChunkDecoder API; it's only
used in the LineWriter in the same package]

The old RuneChunkDecoder API was unnecessarily complicated and
slow.  In general returning an interface from an interface method
is bad for operations that are called many times, since the
returned interface is always represented by a pointer (in Go
1.4+), which commonly means that an allocation must be performed.

Added some benchmarks to show the difference.  In this case we
don't really care about the performance, but it's nice to get a
simpler API as well.

Also added a simple tool "linewrap", which formats text from
stdin into pretty output on stdout, using LineWriter.

ns/op                                    old     new   delta
UTF8LineWriter_Sizes_0_Width_0        948244  933565  -1.55%
UTF8LineWriter_Sizes_0_Width_10      1002200  980595  -2.16%
UTF8LineWriter_Sizes_0_Width_Inf      882431  861149  -2.41%
UTF8LineWriter_Sizes_1_Width_0       2327690 1341671 -42.36%
UTF8LineWriter_Sizes_1_Width_10      2369511 1397329 -41.03%
UTF8LineWriter_Sizes_1_Width_Inf     2248815 1275818 -43.27%
UTF8LineWriter_Sizes_1_2_3_Width_0   1717722 1206084 -29.79%
UTF8LineWriter_Sizes_1_2_3_Width_10  1765252 1251204 -29.12%
UTF8LineWriter_Sizes_1_2_3_Width_Inf 1641235 1123916 -31.52%
UTF8ChunkDecoder_Sizes_0              219613  189368 -13.77%
UTF8ChunkDecoder_Sizes_1             1428130  552981 -61.28%
UTF8ChunkDecoder_Sizes_1_2_3          869106  389633 -55.17%

allocs                                 old new   delta
UTF8LineWriter_Sizes_0_Width_0          15  13 -13.33%
UTF8LineWriter_Sizes_0_Width_10         15  13 -13.33%
UTF8LineWriter_Sizes_0_Width_Inf        17  15 -11.76%
UTF8LineWriter_Sizes_1_Width_0       13314  13 -99.90%
UTF8LineWriter_Sizes_1_Width_10      13314  13 -99.90%
UTF8LineWriter_Sizes_1_Width_Inf     13316  15 -99.89%
UTF8LineWriter_Sizes_1_2_3_Width_0    6665  13 -99.80%
UTF8LineWriter_Sizes_1_2_3_Width_10   6665  13 -99.80%
UTF8LineWriter_Sizes_1_2_3_Width_Inf  6667  15 -99.78%
UTF8ChunkDecoder_Sizes_0                23  21  -8.70%
UTF8ChunkDecoder_Sizes_1             13322  21 -99.84%
UTF8ChunkDecoder_Sizes_1_2_3          6673  21 -99.69%

Change-Id: Ifd649cb884a23397ec54332827aff9b8f457d1d2
diff --git a/cmd/linewrap/doc.go b/cmd/linewrap/doc.go
new file mode 100644
index 0000000..b61e8e7
--- /dev/null
+++ b/cmd/linewrap/doc.go
@@ -0,0 +1,64 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated via go generate.
+// DO NOT UPDATE MANUALLY
+
+/*
+Command linewrap formats text from stdin into pretty output on stdout.
+
+The input text is expected to consist of words, defined as sequences of letters.
+Sequences of words form paragraphs, where paragraphs are separated by either
+blank lines (that contain no letters), or an explicit U+2029 ParagraphSeparator.
+Input lines with leading spaces are treated verbatim.
+
+Paragraphs are output as word-wrapped lines; line breaks only occur at word
+boundaries.  Output lines are usually no longer than the target width,
+defaulting to the terminal width.  The exceptions are single words longer than
+the target width, which are output on their own line, and verbatim lines, which
+may be arbitrarily longer or shorter than the width.
+
+Output lines never contain trailing spaces.  Only verbatim output lines may
+contain leading spaces.  Spaces separating input words are output verbatim,
+unless it would result in a line with leading or trailing spaces.
+
+Example usage in a unix terminal:
+  $ cat myfile.txt | linewrap
+
+See http://godoc.org/v.io/x/lib/textutil#LineWriter for details on the
+formatting algorithm.
+
+Usage:
+   linewrap [flags]
+
+The linewrap flags are:
+ -indents=
+   Comma-separated indentation prefixes.  Each entry specifes the prefix to use
+   for the corresponding paragraph line, or all subsequent paragraph lines if
+   there are no more entries.  E.g. "AA,BBB,C" means the first line in each
+   paragraph is indented with "AA", the second line with "BBB", and all
+   subsequent lines with "C".  The format of each indent prefix is a Go
+   interpreted string literal.
+      https://golang.org/ref/spec#String_literals
+ -line-term=\n
+   Line terminator.  Every output line is terminated with this string.  The
+   format is a Go interpreted string literal, where \n means newline.
+      https://golang.org/ref/spec#String_literals
+ -para-sep=\n
+   Paragraph separator.  Every consecutive pair of non-empty paragraphs is
+   separated with this string.  The format is a Go interpreted string literal,
+   where \n menas newline.
+      https://golang.org/ref/spec#String_literals
+ -width=<terminal width>
+   Target line width in runes.  If negative the line width is unlimited; each
+   paragraph is output as a single line.  If 0 each word is output on its own
+   line. Defaults to the terminal width.
+
+The global flags are:
+ -metadata=<just specify -metadata to activate>
+   Displays metadata for the program and exits.
+ -time=false
+   Dump timing information to stderr before exiting the program.
+*/
+package main
diff --git a/cmd/linewrap/linewrap.go b/cmd/linewrap/linewrap.go
new file mode 100644
index 0000000..03db4ab
--- /dev/null
+++ b/cmd/linewrap/linewrap.go
@@ -0,0 +1,126 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The following enables go generate to generate the doc.go file.
+//go:generate go run $JIRI_ROOT/release/go/src/v.io/x/lib/cmdline/testdata/gendoc.go . -h
+
+package main
+
+import (
+	"io"
+	"strconv"
+	"strings"
+
+	"v.io/x/lib/cmdline"
+	"v.io/x/lib/textutil"
+)
+
+func main() {
+	cmdline.Main(cmdLineWrap)
+}
+
+var cmdLineWrap = &cmdline.Command{
+	Runner: cmdline.RunnerFunc(runLineWrap),
+	Name:   "linewrap",
+	Short:  "formats text into pretty output",
+	Long: `
+Command linewrap formats text from stdin into pretty output on stdout.
+
+The input text is expected to consist of words, defined as sequences of letters.
+Sequences of words form paragraphs, where paragraphs are separated by either
+blank lines (that contain no letters), or an explicit U+2029 ParagraphSeparator.
+Input lines with leading spaces are treated verbatim.
+
+Paragraphs are output as word-wrapped lines; line breaks only occur at word
+boundaries.  Output lines are usually no longer than the target width,
+defaulting to the terminal width.  The exceptions are single words longer than
+the target width, which are output on their own line, and verbatim lines, which
+may be arbitrarily longer or shorter than the width.
+
+Output lines never contain trailing spaces.  Only verbatim output lines may
+contain leading spaces.  Spaces separating input words are output verbatim,
+unless it would result in a line with leading or trailing spaces.
+
+Example usage in a unix terminal:
+  $ cat myfile.txt | linewrap
+
+See http://godoc.org/v.io/x/lib/textutil#LineWriter for details on the
+formatting algorithm.
+`,
+}
+
+var (
+	flagWidth    int
+	flagLineTerm string
+	flagParaSep  string
+	flagIndents  string
+)
+
+func init() {
+	_, width, err := textutil.TerminalSize()
+	if err != nil {
+		width = 80 // reasonable default for unknown terminal width
+	}
+	cmdLineWrap.Flags.IntVar(&flagWidth, "width", width, `
+Target line width in runes.  If negative the line width is unlimited; each
+paragraph is output as a single line.  If 0 each word is output on its own line.
+Defaults to the terminal width.
+`)
+	cmdLineWrap.Flags.StringVar(&flagLineTerm, "line-term", `\n`, `
+Line terminator.  Every output line is terminated with this string.  The format
+is a Go interpreted string literal, where \n means newline.
+   https://golang.org/ref/spec#String_literals
+`)
+	cmdLineWrap.Flags.StringVar(&flagParaSep, "para-sep", `\n`, `
+Paragraph separator.  Every consecutive pair of non-empty paragraphs is
+separated with this string.  The format is a Go interpreted string literal,
+where \n menas newline.
+   https://golang.org/ref/spec#String_literals
+`)
+	cmdLineWrap.Flags.StringVar(&flagIndents, "indents", ``, `
+Comma-separated indentation prefixes.  Each entry specifes the prefix to use for
+the corresponding paragraph line, or all subsequent paragraph lines if there are
+no more entries.  E.g. "AA,BBB,C" means the first line in each paragraph is
+indented with "AA", the second line with "BBB", and all subsequent lines with
+"C".  The format of each indent prefix is a Go interpreted string literal.
+   https://golang.org/ref/spec#String_literals
+`)
+	// Override default value, so that the godoc style shows good defaults.
+	cmdLineWrap.Flags.Lookup("width").DefValue = "<terminal width>"
+}
+
+func runLineWrap(env *cmdline.Env, args []string) error {
+	lineTerm, err := strconv.Unquote(`"` + flagLineTerm + `"`)
+	if err != nil {
+		return err
+	}
+	paraSep, err := strconv.Unquote(`"` + flagParaSep + `"`)
+	if err != nil {
+		return err
+	}
+	var indents []string
+	if flagIndents != "" {
+		for _, quoted := range strings.Split(flagIndents, ",") {
+			indent, err := strconv.Unquote(`"` + quoted + `"`)
+			if err != nil {
+				return err
+			}
+			indents = append(indents, indent)
+		}
+	}
+	w := textutil.NewUTF8LineWriter(env.Stdout, flagWidth)
+	if err := w.SetLineTerminator(lineTerm); err != nil {
+		return err
+	}
+	if err := w.SetParagraphSeparator(paraSep); err != nil {
+		return err
+	}
+	if err := w.SetIndents(indents...); err != nil {
+		return err
+	}
+	if _, err := io.Copy(w, env.Stdin); err != nil {
+		return err
+	}
+	return w.Flush()
+}
diff --git a/textutil/.api b/textutil/.api
index cc033a5..278023d 100644
--- a/textutil/.api
+++ b/textutil/.api
@@ -2,12 +2,12 @@
 pkg textutil, const LineSeparator ideal-char
 pkg textutil, const ParagraphSeparator ideal-char
 pkg textutil, func ByteReplaceWriter(io.Writer, byte, string) io.Writer
+pkg textutil, func FlushRuneChunk(RuneChunkDecoder, func(rune) error) error
 pkg textutil, func NewLineWriter(io.Writer, int, RuneChunkDecoder, RuneEncoder) *LineWriter
 pkg textutil, func NewUTF8LineWriter(io.Writer, int) *LineWriter
 pkg textutil, func PrefixWriter(io.Writer, string) io.Writer
-pkg textutil, func RuneChunkFlush(RuneChunkDecoder, func(rune) error) error
-pkg textutil, func RuneChunkWrite(RuneChunkDecoder, func(rune) error, []byte) (int, error)
 pkg textutil, func TerminalSize() (int, int, error)
+pkg textutil, func WriteRuneChunk(RuneChunkDecoder, func(rune) error, []byte) (int, error)
 pkg textutil, method (*LineWriter) Flush() error
 pkg textutil, method (*LineWriter) ForceVerbatim(bool) error
 pkg textutil, method (*LineWriter) SetIndents(...string) error
@@ -15,17 +15,14 @@
 pkg textutil, method (*LineWriter) SetParagraphSeparator(string) error
 pkg textutil, method (*LineWriter) Width() int
 pkg textutil, method (*LineWriter) Write([]byte) (int, error)
-pkg textutil, method (*UTF8ChunkDecoder) Decode([]byte) RuneStreamDecoder
-pkg textutil, method (*UTF8ChunkDecoder) DecodeLeftover() RuneStreamDecoder
+pkg textutil, method (*UTF8ChunkDecoder) DecodeRune([]byte) (rune, int)
+pkg textutil, method (*UTF8ChunkDecoder) FlushRune() rune
 pkg textutil, method (UTF8Encoder) Encode(rune, *bytes.Buffer)
 pkg textutil, type LineWriter struct
-pkg textutil, type RuneChunkDecoder interface { Decode, DecodeLeftover }
-pkg textutil, type RuneChunkDecoder interface, Decode([]byte) RuneStreamDecoder
-pkg textutil, type RuneChunkDecoder interface, DecodeLeftover() RuneStreamDecoder
+pkg textutil, type RuneChunkDecoder interface { DecodeRune, FlushRune }
+pkg textutil, type RuneChunkDecoder interface, DecodeRune([]byte) (rune, int)
+pkg textutil, type RuneChunkDecoder interface, FlushRune() rune
 pkg textutil, type RuneEncoder interface { Encode }
 pkg textutil, type RuneEncoder interface, Encode(rune, *bytes.Buffer)
-pkg textutil, type RuneStreamDecoder interface { BytePos, Next }
-pkg textutil, type RuneStreamDecoder interface, BytePos() int
-pkg textutil, type RuneStreamDecoder interface, Next() rune
 pkg textutil, type UTF8ChunkDecoder struct
 pkg textutil, type UTF8Encoder struct
diff --git a/textutil/line_writer.go b/textutil/line_writer.go
index 0742af8..c4dcb1f 100644
--- a/textutil/line_writer.go
+++ b/textutil/line_writer.go
@@ -201,7 +201,7 @@
 //
 // Flush must be called after the last call to Write.
 func (w *LineWriter) Write(data []byte) (int, error) {
-	return RuneChunkWrite(w.runeDecoder, w.addRune, data)
+	return WriteRuneChunk(w.runeDecoder, w.addRune, data)
 }
 
 // Flush flushes any remaining buffered text, and resets the paragraph line
@@ -212,7 +212,7 @@
 // Flush must be called after the last call to Write, and may be called an
 // arbitrary number of times before the last Write.
 func (w *LineWriter) Flush() error {
-	if err := RuneChunkFlush(w.runeDecoder, w.addRune); err != nil {
+	if err := FlushRuneChunk(w.runeDecoder, w.addRune); err != nil {
 		return err
 	}
 	// Add U+2028 to force the last line (if any) to be written.
diff --git a/textutil/line_writer_test.go b/textutil/line_writer_test.go
index 50f4184..06fdc9f 100644
--- a/textutil/line_writer_test.go
+++ b/textutil/line_writer_test.go
@@ -273,7 +273,7 @@
 	return strings.Repeat(" ", count)
 }
 
-func newUTF8LineWriter(t *testing.T, buf io.Writer, width int, lp lp, indents []int) *LineWriter {
+func newUTF8LineWriter(t testing.TB, buf io.Writer, width int, lp lp, indents []int) *LineWriter {
 	w := NewUTF8LineWriter(buf, width)
 	if lp.line != "" || lp.para != "" {
 		if err := w.SetLineTerminator(lp.line); err != nil {
@@ -295,9 +295,9 @@
 	return w
 }
 
-func lineWriterWriteFlush(t *testing.T, w *LineWriter, text string, sizes []int) {
+func lineWriterWriteFlush(t testing.TB, w *LineWriter, text string, sizes []int) {
 	// Write chunks of different sizes until we've exhausted the input.
-	remain := text
+	remain := []byte(text)
 	for ix := 0; len(remain) > 0; ix++ {
 		var chunk []byte
 		chunk, remain = nextChunk(remain, sizes, ix)
@@ -311,3 +311,41 @@
 		t.Errorf("%q Flush() got %v, want nil", text, err)
 	}
 }
+
+func benchUTF8LineWriter(b *testing.B, width int, sizes []int) {
+	for i := 0; i < b.N; i++ {
+		var buf bytes.Buffer
+		w := newUTF8LineWriter(b, &buf, width, lp{}, nil)
+		lineWriterWriteFlush(b, w, benchText, sizes)
+	}
+}
+
+func BenchmarkUTF8LineWriter_Sizes_0_Width_0(b *testing.B) {
+	benchUTF8LineWriter(b, 0, nil)
+}
+func BenchmarkUTF8LineWriter_Sizes_0_Width_10(b *testing.B) {
+	benchUTF8LineWriter(b, 10, nil)
+}
+func BenchmarkUTF8LineWriter_Sizes_0_Width_Inf(b *testing.B) {
+	benchUTF8LineWriter(b, -1, nil)
+}
+
+func BenchmarkUTF8LineWriter_Sizes_1_Width_0(b *testing.B) {
+	benchUTF8LineWriter(b, 0, []int{1})
+}
+func BenchmarkUTF8LineWriter_Sizes_1_Width_10(b *testing.B) {
+	benchUTF8LineWriter(b, 10, []int{1})
+}
+func BenchmarkUTF8LineWriter_Sizes_1_Width_Inf(b *testing.B) {
+	benchUTF8LineWriter(b, -1, []int{1})
+}
+
+func BenchmarkUTF8LineWriter_Sizes_1_2_3_Width_0(b *testing.B) {
+	benchUTF8LineWriter(b, 0, []int{1, 2, 3})
+}
+func BenchmarkUTF8LineWriter_Sizes_1_2_3_Width_10(b *testing.B) {
+	benchUTF8LineWriter(b, 10, []int{1, 2, 3})
+}
+func BenchmarkUTF8LineWriter_Sizes_1_2_3_Width_Inf(b *testing.B) {
+	benchUTF8LineWriter(b, -1, []int{1, 2, 3})
+}
diff --git a/textutil/rune.go b/textutil/rune.go
index 7342e6c..49ed2e9 100644
--- a/textutil/rune.go
+++ b/textutil/rune.go
@@ -23,16 +23,6 @@
 	Encode(r rune, buf *bytes.Buffer)
 }
 
-// RuneStreamDecoder is the interface to a decoder of a contiguous stream of
-// runes.
-type RuneStreamDecoder interface {
-	// Next returns the next rune.  Invalid encodings are returned as U+FFFD.
-	// Returns EOF at the end of the stream.
-	Next() rune
-	// BytePos returns the current byte position in the original data buffer.
-	BytePos() int
-}
-
 // RuneChunkDecoder is the interface to a decoder of a stream of encoded runes
 // that may be arbitrarily chunked.
 //
@@ -40,45 +30,53 @@
 // wrappers, to handle buffering when chunk boundaries may occur in the middle
 // of an encoded rune.
 type RuneChunkDecoder interface {
-	// Decode returns a RuneStreamDecoder that decodes the data chunk.  Call Next
-	// repeatedly on the returned stream until it returns EOF to decode the chunk.
-	Decode(chunk []byte) RuneStreamDecoder
-	// DecodeLeftover returns a RuneStreamDecoder that decodes leftover buffered
-	// data.  Call Next repeatedly on the returned stream until it returns EOF to
-	// ensure all buffered data is processed.
-	DecodeLeftover() RuneStreamDecoder
+	// DecodeRune returns the next rune in chunk, and its width in bytes.  If
+	// chunk represents a partial rune, the chunk is buffered and returns EOF and
+	// the size of the chunk.  Subsequent calls to DecodeRune will combine
+	// previously buffered data when decoding.
+	DecodeRune(chunk []byte) (r rune, n int)
+	// FlushRune returns the next buffered rune.  Returns EOF when all buffered
+	// data is returned.
+	FlushRune() rune
 }
 
-// RuneChunkWrite is a helper that calls d.Decode(data) and repeatedly calls
-// Next in a loop, calling fn for every rune that is decoded.  Returns the
-// number of bytes in data that were successfully processed.  If fn returns an
-// error, Write will return with that error, without processing any more data.
+// WriteRuneChunk is a helper that repeatedly calls d.DecodeRune(chunk) until
+// EOF, calling fn for every rune that is decoded.  Returns the number of bytes
+// in data that were successfully processed.  If fn returns an error,
+// WriteRuneChunk will return with that error, without processing any more data.
 //
 // This is a convenience for implementing io.Writer, given a RuneChunkDecoder.
-func RuneChunkWrite(d RuneChunkDecoder, fn func(rune) error, data []byte) (int, error) {
-	stream := d.Decode(data)
-	for r := stream.Next(); r != EOF; r = stream.Next() {
+func WriteRuneChunk(d RuneChunkDecoder, fn func(rune) error, chunk []byte) (int, error) {
+	pos := 0
+	for pos < len(chunk) {
+		r, size := d.DecodeRune(chunk[pos:])
+		pos += size
+		if r == EOF {
+			break
+		}
 		if err := fn(r); err != nil {
-			return stream.BytePos(), err
+			return pos, err
 		}
 	}
-	return stream.BytePos(), nil
+	return pos, nil
 }
 
-// RuneChunkFlush is a helper that calls d.DecodeLeftover and repeatedly calls
-// Next in a loop, calling fn for every rune that is decoded.  If fn returns an
-// error, Flush will return with that error, without processing any more data.
+// FlushRuneChunk is a helper that repeatedly calls d.FlushRune until EOF,
+// calling fn for every rune that is decoded.  If fn returns an error, Flush
+// will return with that error, without processing any more data.
 //
 // This is a convenience for implementing an additional Flush() call on an
 // implementation of io.Writer, given a RuneChunkDecoder.
-func RuneChunkFlush(d RuneChunkDecoder, fn func(rune) error) error {
-	stream := d.DecodeLeftover()
-	for r := stream.Next(); r != EOF; r = stream.Next() {
+func FlushRuneChunk(d RuneChunkDecoder, fn func(rune) error) error {
+	for {
+		r := d.FlushRune()
+		if r == EOF {
+			return nil
+		}
 		if err := fn(r); err != nil {
 			return err
 		}
 	}
-	return nil
 }
 
 // bytePos and runePos distinguish positions that are used in either domain;
diff --git a/textutil/utf8.go b/textutil/utf8.go
index f2e1a5e..8cd411c 100644
--- a/textutil/utf8.go
+++ b/textutil/utf8.go
@@ -37,58 +37,32 @@
 
 var _ RuneChunkDecoder = (*UTF8ChunkDecoder)(nil)
 
-// Decode returns a RuneStreamDecoder that decodes the data chunk.  Call Next
-// repeatedly on the returned stream until it returns EOF to decode the chunk.
-//
-// If the data is chunked in the middle of an encoded rune, the final partial
-// rune in the chunk will be buffered, and the next call to Decode will continue
-// by combining the buffered data with the next chunk.
+// DecodeRune implements the RuneChunkDecoder interface method.
 //
 // Invalid encodings are transformed into U+FFFD, one byte at a time.  See
 // unicode/utf8.DecodeRune for details.
-func (d *UTF8ChunkDecoder) Decode(chunk []byte) RuneStreamDecoder {
-	return &utf8Stream{d, chunk, 0}
-}
-
-// DecodeLeftover returns a RuneStreamDecoder that decodes leftover buffered
-// data.  Call Next repeatedly on the returned stream until it returns EOF to
-// ensure all buffered data is processed.
-//
-// Since the only data that is buffered is the final partial rune, the returned
-// RuneStreamDecoder will only contain U+FFFD or EOF.
-func (d *UTF8ChunkDecoder) DecodeLeftover() RuneStreamDecoder {
-	return &utf8LeftoverStream{d, 0}
-}
-
-// nextRune decodes the next rune, logically combining any previously buffered
-// data with the data chunk.  It returns the decoded rune and the byte size of
-// the data that was used for the decoding.
-//
-// The returned size may be > 0 even if the returned rune == EOF, if a partial
-// rune was detected and buffered.  The returned size may be 0 even if the
-// returned rune != EOF, if previously buffered data was decoded.
-func (d *UTF8ChunkDecoder) nextRune(data []byte) (rune, int) {
+func (d *UTF8ChunkDecoder) DecodeRune(chunk []byte) (rune, int) {
 	if d.partialLen > 0 {
-		return d.nextRunePartial(data)
+		return d.decodeRunePartial(chunk)
 	}
-	r, size := utf8.DecodeRune(data)
-	if r == utf8.RuneError && !utf8.FullRune(data) {
-		// Initialize the partial rune buffer with remaining data.
-		d.partialLen = copy(d.partial[:], data)
-		return d.verifyPartial(d.partialLen, data)
+	r, size := utf8.DecodeRune(chunk)
+	if r == utf8.RuneError && !utf8.FullRune(chunk) {
+		// Initialize the partial rune buffer with chunk.
+		d.partialLen = copy(d.partial[:], chunk)
+		return d.verifyPartial(d.partialLen, chunk)
 	}
 	return r, size
 }
 
-// nextRunePartial implements nextRune when there is a previously buffered
+// decodeRunePartial implements decodeRune when there is a previously buffered
 // partial rune.
-func (d *UTF8ChunkDecoder) nextRunePartial(data []byte) (rune, int) {
-	// Append as much data as we can to the partial rune, and see if it's full.
+func (d *UTF8ChunkDecoder) decodeRunePartial(chunk []byte) (rune, int) {
+	// Append as much as we can to the partial rune, and see if it's full.
 	oldLen := d.partialLen
-	d.partialLen += copy(d.partial[oldLen:], data)
+	d.partialLen += copy(d.partial[oldLen:], chunk)
 	if !utf8.FullRune(d.partial[:d.partialLen]) {
 		// We still don't have a full rune - keep waiting.
-		return d.verifyPartial(d.partialLen-oldLen, data)
+		return d.verifyPartial(d.partialLen-oldLen, chunk)
 	}
 	// We finally have a full rune.
 	r, size := utf8.DecodeRune(d.partial[:d.partialLen])
@@ -100,15 +74,15 @@
 		// isn't a UTF-8 trailing byte.  In this case utf8.DecodeRune returns U+FFFD
 		// and size=1, to indicate we should skip the first byte.
 		//
-		// We shift the unread portion of the old partial data forward, and update
+		// We shift the unread portion of the old partial buffer forward, and update
 		// the partial len so that it's strictly decreasing.  The strictly
 		// decreasing property isn't necessary for correctness, but helps avoid
-		// repeatedly copying data into the partial buffer unecessarily.
+		// repeatedly copying into the partial buffer unecessarily.
 		copy(d.partial[:], d.partial[size:oldLen])
 		d.partialLen = oldLen - size
 		return r, 0
 	}
-	// We've used all the old buffered data; start decoding directly from data.
+	// We've used all of the partial buffer.
 	d.partialLen = 0
 	return r, size - oldLen
 }
@@ -125,47 +99,17 @@
 	return EOF, len(data)
 }
 
-// utf8Stream implements UTF8ChunkDecoder.Decode.
-type utf8Stream struct {
-	d    *UTF8ChunkDecoder
-	data []byte
-	pos  int
-}
-
-var _ RuneStreamDecoder = (*utf8Stream)(nil)
-
-func (s *utf8Stream) Next() rune {
-	if s.pos == len(s.data) {
+// FlushRune implements the RuneChunkDecoder interface method.
+//
+// Since the only data that is buffered is the final partial rune, the return
+// value will only ever be U+FFFD or EOF.  No valid runes are ever returned by
+// this method, but multiple U+FFFD may be returned before EOF.
+func (d *UTF8ChunkDecoder) FlushRune() rune {
+	if d.partialLen == 0 {
 		return EOF
 	}
-	r, size := s.d.nextRune(s.data[s.pos:])
-	s.pos += size
+	r, size := utf8.DecodeRune(d.partial[:d.partialLen])
+	copy(d.partial[:], d.partial[size:])
+	d.partialLen -= size
 	return r
 }
-
-func (s *utf8Stream) BytePos() int {
-	return s.pos
-}
-
-// utf8LeftoverStream implements UTF8ChunkDecoder.DecodeLeftover.
-type utf8LeftoverStream struct {
-	d   *UTF8ChunkDecoder
-	pos int
-}
-
-var _ RuneStreamDecoder = (*utf8LeftoverStream)(nil)
-
-func (s *utf8LeftoverStream) Next() rune {
-	if s.d.partialLen == 0 {
-		return EOF
-	}
-	r, size := utf8.DecodeRune(s.d.partial[:s.d.partialLen])
-	copy(s.d.partial[:], s.d.partial[size:])
-	s.d.partialLen -= size
-	s.pos += size
-	return r
-}
-
-func (s *utf8LeftoverStream) BytePos() int {
-	return s.pos
-}
diff --git a/textutil/utf8_test.go b/textutil/utf8_test.go
index d22d52e..8175e62 100644
--- a/textutil/utf8_test.go
+++ b/textutil/utf8_test.go
@@ -6,6 +6,7 @@
 
 import (
 	"reflect"
+	"strings"
 	"testing"
 )
 
@@ -14,101 +15,139 @@
 	r3 := "王"
 	r4 := "\U0001F680"
 	tests := []struct {
-		Text string
-		Want []rune
+		Text  string
+		Write []rune
+		Flush []rune
 	}{
-		{"", nil},
-		{"a", []rune{'a'}},
-		{"abc", []rune{'a', 'b', 'c'}},
-		{"abc def ghi", []rune{'a', 'b', 'c', ' ', 'd', 'e', 'f', ' ', 'g', 'h', 'i'}},
+		{"", nil, nil},
+		{"a", []rune{'a'}, nil},
+		{"abc", []rune{'a', 'b', 'c'}, nil},
+		{"abc def ghi", []rune{'a', 'b', 'c', ' ', 'd', 'e', 'f', ' ', 'g', 'h', 'i'}, nil},
 		// 2-byte runes.
-		{"ΔΘΠΣΦ", []rune{'Δ', 'Θ', 'Π', 'Σ', 'Φ'}},
+		{"ΔΘΠΣΦ", []rune{'Δ', 'Θ', 'Π', 'Σ', 'Φ'}, nil},
 		// 3-byte runes.
-		{"王普澤世界", []rune{'王', '普', '澤', '世', '界'}},
+		{"王普澤世界", []rune{'王', '普', '澤', '世', '界'}, nil},
 		// 4-byte runes.
-		{"\U0001F680\U0001F681\U0001F682\U0001F683", []rune{'\U0001F680', '\U0001F681', '\U0001F682', '\U0001F683'}},
+		{"\U0001F680\U0001F681\U0001F682\U0001F683", []rune{'\U0001F680', '\U0001F681', '\U0001F682', '\U0001F683'}, nil},
 		// Mixed-bytes.
-		{"aΔ王\U0001F680普Θb", []rune{'a', 'Δ', '王', '\U0001F680', '普', 'Θ', 'b'}},
+		{"aΔ王\U0001F680普Θb", []rune{'a', 'Δ', '王', '\U0001F680', '普', 'Θ', 'b'}, nil},
 		// Error runes translated to U+FFFD.
-		{"\uFFFD", []rune{'\uFFFD'}},
-		{"a\uFFFDb", []rune{'a', '\uFFFD', 'b'}},
-		{"\xFF", []rune{'\uFFFD'}},
-		{"a\xFFb", []rune{'a', '\uFFFD', 'b'}},
+		{"\uFFFD", []rune{'\uFFFD'}, nil},
+		{"a\uFFFDb", []rune{'a', '\uFFFD', 'b'}, nil},
+		{"\x80", []rune{'\uFFFD'}, nil},
+		{"\xFF", nil, []rune{'\uFFFD'}},
+		{"a\x80b", []rune{'a', '\uFFFD', 'b'}, nil},
+		{"a\xFFb", []rune{'a', '\uFFFD', 'b'}, nil},
 		// Multi-byte full runes.
-		{r2, []rune{[]rune(r2)[0]}},
-		{r3, []rune{[]rune(r3)[0]}},
-		{r4, []rune{[]rune(r4)[0]}},
-		// Partial runes translated to U+FFFD.
-		{r2[:1], []rune{'\uFFFD'}},
-		{r3[:1], []rune{'\uFFFD'}},
-		{r3[:2], []rune{'\uFFFD', '\uFFFD'}},
-		{r4[:1], []rune{'\uFFFD'}},
-		{r4[:2], []rune{'\uFFFD', '\uFFFD'}},
-		{r4[:3], []rune{'\uFFFD', '\uFFFD', '\uFFFD'}},
-		// Leading partial runes translated to U+FFFD.
-		{r2[:1] + "b", []rune{'\uFFFD', 'b'}},
-		{r3[:1] + "b", []rune{'\uFFFD', 'b'}},
-		{r3[:2] + "b", []rune{'\uFFFD', '\uFFFD', 'b'}},
-		{r4[:1] + "b", []rune{'\uFFFD', 'b'}},
-		{r4[:2] + "b", []rune{'\uFFFD', '\uFFFD', 'b'}},
-		{r4[:3] + "b", []rune{'\uFFFD', '\uFFFD', '\uFFFD', 'b'}},
-		// Trailing partial runes translated to U+FFFD.
-		{"a" + r2[:1], []rune{'a', '\uFFFD'}},
-		{"a" + r3[:1], []rune{'a', '\uFFFD'}},
-		{"a" + r3[:2], []rune{'a', '\uFFFD', '\uFFFD'}},
-		{"a" + r4[:1], []rune{'a', '\uFFFD'}},
-		{"a" + r4[:2], []rune{'a', '\uFFFD', '\uFFFD'}},
-		{"a" + r4[:3], []rune{'a', '\uFFFD', '\uFFFD', '\uFFFD'}},
+		{r2, []rune{[]rune(r2)[0]}, nil},
+		{r3, []rune{[]rune(r3)[0]}, nil},
+		{r4, []rune{[]rune(r4)[0]}, nil},
+		// Partial runes translated to one or more U+FFFD.  Since each case is a
+		// multi-byte encoding that's missing one or more bytes, the FFFD bytes are
+		// all returned in Flush rather than Write.
+		{r2[:1], nil, []rune{'\uFFFD'}},
+		{r3[:1], nil, []rune{'\uFFFD'}},
+		{r3[:2], nil, []rune{'\uFFFD', '\uFFFD'}},
+		{r4[:1], nil, []rune{'\uFFFD'}},
+		{r4[:2], nil, []rune{'\uFFFD', '\uFFFD'}},
+		{r4[:3], nil, []rune{'\uFFFD', '\uFFFD', '\uFFFD'}},
+		// Trailing partial runes translated to U+FFFD.  Similar to above, the FFFD
+		// bytes are all returned in Flush rather than Write
+		{"a" + r2[:1], []rune{'a'}, []rune{'\uFFFD'}},
+		{"a" + r3[:1], []rune{'a'}, []rune{'\uFFFD'}},
+		{"a" + r3[:2], []rune{'a'}, []rune{'\uFFFD', '\uFFFD'}},
+		{"a" + r4[:1], []rune{'a'}, []rune{'\uFFFD'}},
+		{"a" + r4[:2], []rune{'a'}, []rune{'\uFFFD', '\uFFFD'}},
+		{"a" + r4[:3], []rune{'a'}, []rune{'\uFFFD', '\uFFFD', '\uFFFD'}},
+		// Leading partial runes translated to U+FFFD.  The "b" suffix causes us to
+		// discover that the encoding is invalid during Write.
+		{r2[:1] + "b", []rune{'\uFFFD', 'b'}, nil},
+		{r3[:1] + "b", []rune{'\uFFFD', 'b'}, nil},
+		{r3[:2] + "b", []rune{'\uFFFD', '\uFFFD', 'b'}, nil},
+		{r4[:1] + "b", []rune{'\uFFFD', 'b'}, nil},
+		{r4[:2] + "b", []rune{'\uFFFD', '\uFFFD', 'b'}, nil},
+		{r4[:3] + "b", []rune{'\uFFFD', '\uFFFD', '\uFFFD', 'b'}, nil},
 		// Bracketed partial runes translated to U+FFFD.
-		{"a" + r2[:1] + "b", []rune{'a', '\uFFFD', 'b'}},
-		{"a" + r3[:1] + "b", []rune{'a', '\uFFFD', 'b'}},
-		{"a" + r3[:2] + "b", []rune{'a', '\uFFFD', '\uFFFD', 'b'}},
-		{"a" + r4[:1] + "b", []rune{'a', '\uFFFD', 'b'}},
-		{"a" + r4[:2] + "b", []rune{'a', '\uFFFD', '\uFFFD', 'b'}},
-		{"a" + r4[:3] + "b", []rune{'a', '\uFFFD', '\uFFFD', '\uFFFD', 'b'}},
+		{"a" + r2[:1] + "b", []rune{'a', '\uFFFD', 'b'}, nil},
+		{"a" + r3[:1] + "b", []rune{'a', '\uFFFD', 'b'}, nil},
+		{"a" + r3[:2] + "b", []rune{'a', '\uFFFD', '\uFFFD', 'b'}, nil},
+		{"a" + r4[:1] + "b", []rune{'a', '\uFFFD', 'b'}, nil},
+		{"a" + r4[:2] + "b", []rune{'a', '\uFFFD', '\uFFFD', 'b'}, nil},
+		{"a" + r4[:3] + "b", []rune{'a', '\uFFFD', '\uFFFD', '\uFFFD', 'b'}, nil},
 	}
 	for _, test := range tests {
 		// Run with a variety of chunk sizes.
 		for _, sizes := range [][]int{nil, {1}, {2}, {1, 2}, {2, 1}, {3}, {1, 2, 3}} {
-			got := runeChunkWriteFlush(t, test.Text, sizes)
-			if want := test.Want; !reflect.DeepEqual(got, want) {
-				t.Errorf("%q got %v, want %v", test.Text, got, want)
+			dec := new(UTF8ChunkDecoder)
+			if got, want := writeRuneChunk(t, dec, test.Text, sizes), test.Write; !reflect.DeepEqual(got, want) {
+				t.Errorf("%q write got %v, want %v", test.Text, got, want)
+			}
+			if got, want := flushRuneChunk(t, dec, test.Text), test.Flush; !reflect.DeepEqual(got, want) {
+				t.Errorf("%q flush got %v, want %v", test.Text, got, want)
 			}
 		}
 	}
 }
 
-func runeChunkWriteFlush(t *testing.T, text string, sizes []int) []rune {
-	var dec UTF8ChunkDecoder
+func writeRuneChunk(t testing.TB, dec RuneChunkDecoder, text string, sizes []int) []rune {
 	var runes []rune
 	addRune := func(r rune) error {
 		runes = append(runes, r)
 		return nil
 	}
 	// Write chunks of different sizes until we've exhausted the input text.
-	remain := text
+	remain := []byte(text)
 	for ix := 0; len(remain) > 0; ix++ {
 		var chunk []byte
 		chunk, remain = nextChunk(remain, sizes, ix)
-		got, err := RuneChunkWrite(&dec, addRune, chunk)
+		got, err := WriteRuneChunk(dec, addRune, chunk)
 		if want := len(chunk); got != want || err != nil {
-			t.Errorf("%q RuneChunkWrite(%q) got (%d,%v), want (%d,nil)", text, chunk, got, err, want)
+			t.Errorf("%q WriteRuneChunk(%q) got (%d,%v), want (%d,nil)", text, chunk, got, err, want)
 		}
 	}
-	// Flush the decoder.
-	if err := RuneChunkFlush(&dec, addRune); err != nil {
-		t.Errorf("%q RuneChunkFlush got %v, want nil", text, err)
-	}
 	return runes
 }
 
-func nextChunk(text string, sizes []int, index int) (chunk []byte, remain string) {
+func flushRuneChunk(t testing.TB, dec RuneChunkDecoder, text string) []rune {
+	var runes []rune
+	addRune := func(r rune) error {
+		runes = append(runes, r)
+		return nil
+	}
+	// Flush the decoder.
+	if err := FlushRuneChunk(dec, addRune); err != nil {
+		t.Errorf("%q FlushRuneChunk got %v, want nil", text, err)
+	}
+	return runes
+}
+
+func nextChunk(text []byte, sizes []int, index int) (chunk, remain []byte) {
 	if len(sizes) == 0 {
-		return []byte(text), ""
+		return text, nil
 	}
 	size := sizes[index%len(sizes)]
 	if size >= len(text) {
-		return []byte(text), ""
+		return text, nil
 	}
-	return []byte(text[:size]), text[size:]
+	return text[:size], text[size:]
+}
+
+// benchText contains a mix of 1, 2, 3 and 4 byte runes, and invalid encodings.
+var benchText = strings.Repeat("a bc def ghij klmno pqrstu vwxyz A BC DEF GHIJ KLMNO PQRSTU VWXYZ 0123456789 !@#$%^&*()ΔΘΠΣΦ王普澤世界\U0001F680\U0001F681\U0001F682\U0001F683\uFFFD\xFF ", 100)
+
+func benchRuneChunkDecoder(b *testing.B, dec RuneChunkDecoder, sizes []int) {
+	for i := 0; i < b.N; i++ {
+		writeRuneChunk(b, dec, benchText, sizes)
+		flushRuneChunk(b, dec, benchText)
+	}
+}
+
+func BenchmarkUTF8ChunkDecoder_Sizes_0(b *testing.B) {
+	benchRuneChunkDecoder(b, new(UTF8ChunkDecoder), nil)
+}
+func BenchmarkUTF8ChunkDecoder_Sizes_1(b *testing.B) {
+	benchRuneChunkDecoder(b, new(UTF8ChunkDecoder), []int{1})
+}
+func BenchmarkUTF8ChunkDecoder_Sizes_1_2_3(b *testing.B) {
+	benchRuneChunkDecoder(b, new(UTF8ChunkDecoder), []int{1, 2, 3})
 }