textutil/rune.go - release.go.x.lib - Git at Google

 // Copyright 2015 The Vanadium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package textutil

 import (
 	"bytes"
 )

 // TODO(toddw): Add UTF16 support.

 const (
 	EOF                = rune(-1) // Indicates the end of a rune stream.
 	LineSeparator      = '\u2028' // Unicode line separator rune.
 	ParagraphSeparator = '\u2029' // Unicode paragraph separator rune.
 )

 // RuneEncoder is the interface to an encoder of a stream of runes into
 // bytes.Buffer.
 type RuneEncoder interface {
 	// Encode encodes r into buf.
 	Encode(r rune, buf *bytes.Buffer)
 }

 // RuneStreamDecoder is the interface to a decoder of a contiguous stream of
 // runes.
 type RuneStreamDecoder interface {
 	// Next returns the next rune.  Invalid encodings are returned as U+FFFD.
 	// Returns EOF at the end of the stream.
 	Next() rune
 	// BytePos returns the current byte position in the original data buffer.
 	BytePos() int
 }

 // RuneChunkDecoder is the interface to a decoder of a stream of encoded runes
 // that may be arbitrarily chunked.
 //
 // Implementations of RuneChunkDecoder are commonly used to implement io.Writer
 // wrappers, to handle buffering when chunk boundaries may occur in the middle
 // of an encoded rune.
 type RuneChunkDecoder interface {
 	// Decode returns a RuneStreamDecoder that decodes the data chunk.  Call Next
 	// repeatedly on the returned stream until it returns EOF to decode the chunk.
 	Decode(chunk []byte) RuneStreamDecoder
 	// DecodeLeftover returns a RuneStreamDecoder that decodes leftover buffered
 	// data.  Call Next repeatedly on the returned stream until it returns EOF to
 	// ensure all buffered data is processed.
 	DecodeLeftover() RuneStreamDecoder
 }

 // RuneChunkWrite is a helper that calls d.Decode(data) and repeatedly calls
 // Next in a loop, calling fn for every rune that is decoded.  Returns the
 // number of bytes in data that were successfully processed.  If fn returns an
 // error, Write will return with that error, without processing any more data.
 //
 // This is a convenience for implementing io.Writer, given a RuneChunkDecoder.
 func RuneChunkWrite(d RuneChunkDecoder, fn func(rune) error, data []byte) (int, error) {
 	stream := d.Decode(data)
 	for r := stream.Next(); r != EOF; r = stream.Next() {
 		if err := fn(r); err != nil {
 			return stream.BytePos(), err
 		}
 	}
 	return stream.BytePos(), nil
 }

 // RuneChunkFlush is a helper that calls d.DecodeLeftover and repeatedly calls
 // Next in a loop, calling fn for every rune that is decoded.  If fn returns an
 // error, Flush will return with that error, without processing any more data.
 //
 // This is a convenience for implementing an additional Flush() call on an
 // implementation of io.Writer, given a RuneChunkDecoder.
 func RuneChunkFlush(d RuneChunkDecoder, fn func(rune) error) error {
 	stream := d.DecodeLeftover()
 	for r := stream.Next(); r != EOF; r = stream.Next() {
 		if err := fn(r); err != nil {
 			return err
 		}
 	}
 	return nil
 }

 // bytePos and runePos distinguish positions that are used in either domain;
 // we're trying to avoid silly mistakes like adding a bytePos to a runePos.
 type bytePos int
 type runePos int

 // byteRuneBuffer maintains a buffer with both byte and rune based positions.
 type byteRuneBuffer struct {
 	enc     RuneEncoder
 	buf     bytes.Buffer
 	runeLen runePos
 }

 func (b *byteRuneBuffer) ByteLen() bytePos { return bytePos(b.buf.Len()) }
 func (b *byteRuneBuffer) RuneLen() runePos { return b.runeLen }
 func (b *byteRuneBuffer) Bytes() []byte    { return b.buf.Bytes() }

 func (b *byteRuneBuffer) Reset() {
 	b.buf.Reset()
 	b.runeLen = 0
 }

 // WriteRune writes r into b.
 func (b *byteRuneBuffer) WriteRune(r rune) {
 	b.enc.Encode(r, &b.buf)
 	b.runeLen++
 }

 // WriteString writes str into b.
 func (b *byteRuneBuffer) WriteString(str string) {
 	for _, r := range str {
 		b.WriteRune(r)
 	}
 }

 // WriteString0Runes writes str into b, not incrementing the rune length.
 func (b *byteRuneBuffer) WriteString0Runes(str string) {
 	for _, r := range str {
 		b.enc.Encode(r, &b.buf)
 	}
 }
	// Copyright 2015 The Vanadium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package textutil

	import (
	"bytes"
	)

	// TODO(toddw): Add UTF16 support.

	const (
	EOF = rune(-1) // Indicates the end of a rune stream.
	LineSeparator = '\u2028' // Unicode line separator rune.
	ParagraphSeparator = '\u2029' // Unicode paragraph separator rune.
	)

	// RuneEncoder is the interface to an encoder of a stream of runes into
	// bytes.Buffer.
	type RuneEncoder interface {
	// Encode encodes r into buf.
	Encode(r rune, buf *bytes.Buffer)
	}

	// RuneStreamDecoder is the interface to a decoder of a contiguous stream of
	// runes.
	type RuneStreamDecoder interface {
	// Next returns the next rune. Invalid encodings are returned as U+FFFD.
	// Returns EOF at the end of the stream.
	Next() rune
	// BytePos returns the current byte position in the original data buffer.
	BytePos() int
	}

	// RuneChunkDecoder is the interface to a decoder of a stream of encoded runes
	// that may be arbitrarily chunked.
	//
	// Implementations of RuneChunkDecoder are commonly used to implement io.Writer
	// wrappers, to handle buffering when chunk boundaries may occur in the middle
	// of an encoded rune.
	type RuneChunkDecoder interface {
	// Decode returns a RuneStreamDecoder that decodes the data chunk. Call Next
	// repeatedly on the returned stream until it returns EOF to decode the chunk.
	Decode(chunk []byte) RuneStreamDecoder
	// DecodeLeftover returns a RuneStreamDecoder that decodes leftover buffered
	// data. Call Next repeatedly on the returned stream until it returns EOF to
	// ensure all buffered data is processed.
	DecodeLeftover() RuneStreamDecoder
	}

	// RuneChunkWrite is a helper that calls d.Decode(data) and repeatedly calls
	// Next in a loop, calling fn for every rune that is decoded. Returns the
	// number of bytes in data that were successfully processed. If fn returns an
	// error, Write will return with that error, without processing any more data.
	//
	// This is a convenience for implementing io.Writer, given a RuneChunkDecoder.
	func RuneChunkWrite(d RuneChunkDecoder, fn func(rune) error, data []byte) (int, error) {
	stream := d.Decode(data)
	for r := stream.Next(); r != EOF; r = stream.Next() {
	if err := fn(r); err != nil {
	return stream.BytePos(), err
	}
	}
	return stream.BytePos(), nil
	}

	// RuneChunkFlush is a helper that calls d.DecodeLeftover and repeatedly calls
	// Next in a loop, calling fn for every rune that is decoded. If fn returns an
	// error, Flush will return with that error, without processing any more data.
	//
	// This is a convenience for implementing an additional Flush() call on an
	// implementation of io.Writer, given a RuneChunkDecoder.
	func RuneChunkFlush(d RuneChunkDecoder, fn func(rune) error) error {
	stream := d.DecodeLeftover()
	for r := stream.Next(); r != EOF; r = stream.Next() {
	if err := fn(r); err != nil {
	return err
	}
	}
	return nil
	}

	// bytePos and runePos distinguish positions that are used in either domain;
	// we're trying to avoid silly mistakes like adding a bytePos to a runePos.
	type bytePos int
	type runePos int

	// byteRuneBuffer maintains a buffer with both byte and rune based positions.
	type byteRuneBuffer struct {
	enc RuneEncoder
	buf bytes.Buffer
	runeLen runePos
	}

	func (b *byteRuneBuffer) ByteLen() bytePos { return bytePos(b.buf.Len()) }
	func (b *byteRuneBuffer) RuneLen() runePos { return b.runeLen }
	func (b *byteRuneBuffer) Bytes() []byte { return b.buf.Bytes() }

	func (b *byteRuneBuffer) Reset() {
	b.buf.Reset()
	b.runeLen = 0
	}

	// WriteRune writes r into b.
	func (b *byteRuneBuffer) WriteRune(r rune) {
	b.enc.Encode(r, &b.buf)
	b.runeLen++
	}

	// WriteString writes str into b.
	func (b *byteRuneBuffer) WriteString(str string) {
	for _, r := range str {
	b.WriteRune(r)
	}
	}

	// WriteString0Runes writes str into b, not incrementing the rune length.
	func (b *byteRuneBuffer) WriteString0Runes(str string) {
	for _, r := range str {
	b.enc.Encode(r, &b.buf)
	}
	}