|
1 | 1 | package orc
|
2 | 2 |
|
3 | 3 | import (
|
4 |
| - "bufio" |
5 | 4 | "bytes"
|
| 5 | + "fmt" |
| 6 | + "io" |
| 7 | + "sync" |
6 | 8 | )
|
7 | 9 |
|
8 |
| -// BufferedWriter wraps a *bufio.Writer and records the current |
9 |
| -// position of the writer prior to flushing to the underlying |
10 |
| -// writer. |
11 | 10 | type BufferedWriter struct {
|
12 |
| - *bufio.Writer |
13 |
| - *bytes.Buffer |
14 |
| - codec CompressionCodec |
15 |
| - checkpoint uint64 |
16 |
| - written uint64 |
| 11 | + uncompressedBuffer *bytes.Buffer |
| 12 | + encodedBuffer *bytes.Buffer |
| 13 | + codec CompressionCodec |
| 14 | + chunkSize int |
| 15 | + checkpoint uint64 |
| 16 | + written uint64 |
| 17 | + sync.Mutex |
17 | 18 | }
|
18 | 19 |
|
19 | 20 | // NewBufferedWriter returns a new BufferedWriter using the provided
|
20 | 21 | // CompressionCodec.
|
21 | 22 | func NewBufferedWriter(codec CompressionCodec) *BufferedWriter {
|
22 |
| - buf := &bytes.Buffer{} |
| 23 | + chunkSize := 1024 |
| 24 | + switch codec.(type) { |
| 25 | + case CompressionNone: |
| 26 | + chunkSize = 1 |
| 27 | + case CompressionZlib: |
| 28 | + chunkSize = int(DefaultCompressionChunkSize) |
| 29 | + } |
23 | 30 | return &BufferedWriter{
|
24 |
| - codec: codec, |
25 |
| - Writer: bufio.NewWriterSize( |
26 |
| - codec.Encoder(buf), |
27 |
| - int(DefaultCompressionChunkSize), |
28 |
| - ), |
29 |
| - Buffer: buf, |
| 31 | + codec: codec, |
| 32 | + uncompressedBuffer: &bytes.Buffer{}, |
| 33 | + encodedBuffer: &bytes.Buffer{}, |
| 34 | + chunkSize: chunkSize, |
30 | 35 | }
|
31 | 36 | }
|
32 | 37 |
|
33 |
| -// WriteByte writes a byte to the underlying buffer an increments the total |
34 |
| -// number of bytes written. |
| 38 | +// WriteByte writes a byte to the underlying buffer. |
| 39 | +// If the desired chunk size is reached, the buffer is compressed |
35 | 40 | func (b *BufferedWriter) WriteByte(c byte) error {
|
36 |
| - b.written++ |
37 |
| - return b.Writer.WriteByte(c) |
| 41 | + b.Lock() |
| 42 | + defer b.Unlock() |
| 43 | + |
| 44 | + if b.uncompressedBuffer.Len() == b.chunkSize { |
| 45 | + err := b.spill() |
| 46 | + if err != nil { |
| 47 | + return err |
| 48 | + } |
| 49 | + } |
| 50 | + _, err := b.uncompressedBuffer.Write([]byte{c}) |
| 51 | + return err |
38 | 52 | }
|
39 | 53 |
|
40 |
| -// Write writes the provided byte slice to the underlying buffer an increments |
41 |
| -// the total number of bytes written. |
| 54 | +// Write writes the provided byte slice to the underlying buffer. |
| 55 | +// If the desired chunk size is reached, the buffer is compressed |
42 | 56 | func (b *BufferedWriter) Write(p []byte) (int, error) {
|
43 |
| - b.written += uint64(len(p)) |
44 |
| - return b.Writer.Write(p) |
| 57 | + b.Lock() |
| 58 | + defer b.Unlock() |
| 59 | + pos := 0 |
| 60 | + |
| 61 | + var remaining int |
| 62 | + l := len(p) |
| 63 | + c := b.chunkSize - b.uncompressedBuffer.Len() |
| 64 | + if c > l { |
| 65 | + remaining = l |
| 66 | + } else { |
| 67 | + remaining = c |
| 68 | + } |
| 69 | + |
| 70 | + n, err := b.uncompressedBuffer.Write(p[pos : pos+remaining]) |
| 71 | + if err != nil { |
| 72 | + return 0, err |
| 73 | + } |
| 74 | + pos += n |
| 75 | + l -= n |
| 76 | + |
| 77 | + for l != 0 { |
| 78 | + if err := b.spill(); err != nil { |
| 79 | + return 0, err |
| 80 | + } |
| 81 | + |
| 82 | + c = b.chunkSize - b.uncompressedBuffer.Len() |
| 83 | + if c > l { |
| 84 | + remaining = l |
| 85 | + } else { |
| 86 | + remaining = c |
| 87 | + } |
| 88 | + |
| 89 | + n, err = b.uncompressedBuffer.Write(p[pos : pos+remaining]) |
| 90 | + if err != nil { |
| 91 | + return 0, err |
| 92 | + } |
| 93 | + pos += n |
| 94 | + l -= n |
| 95 | + } |
| 96 | + |
| 97 | + return pos, nil |
| 98 | +} |
| 99 | + |
| 100 | +// spill to the encoder to handle the compression and update the number of |
| 101 | +// written bytes to the encoded buffer |
| 102 | +func (b *BufferedWriter) spill() error { |
| 103 | + encoder := b.codec.Encoder(b.encodedBuffer) |
| 104 | + l := b.uncompressedBuffer.Len() |
| 105 | + n, err := io.Copy(encoder, b.uncompressedBuffer) |
| 106 | + if err != nil { |
| 107 | + return err |
| 108 | + } |
| 109 | + if int(n) != l { |
| 110 | + return fmt.Errorf("Expected to write %d bytes, wrote %d", l, n) |
| 111 | + } |
| 112 | + |
| 113 | + err = encoder.Close() |
| 114 | + if err != nil { |
| 115 | + return err |
| 116 | + } |
| 117 | + b.written += uint64(n) |
| 118 | + return nil |
45 | 119 | }
|
46 | 120 |
|
47 | 121 | func (b *BufferedWriter) Positions() []uint64 {
|
| 122 | + b.Lock() |
| 123 | + defer b.Unlock() |
| 124 | + |
| 125 | + //TODO: Do we still need the checkpoint? |
48 | 126 | switch b.codec.(type) {
|
49 | 127 | case CompressionNone:
|
50 | 128 | checkpoint := b.checkpoint
|
51 | 129 | b.checkpoint = b.written
|
52 | 130 | return []uint64{checkpoint}
|
53 | 131 | default:
|
54 |
| - return nil |
| 132 | + //TODO: check if this is correct |
| 133 | + checkpoint := b.checkpoint |
| 134 | + b.checkpoint = b.written |
| 135 | + return []uint64{checkpoint} |
| 136 | + |
| 137 | + // return nil |
55 | 138 | }
|
56 | 139 | }
|
57 | 140 |
|
| 141 | +func (b *BufferedWriter) Flush() error { |
| 142 | + b.Lock() |
| 143 | + defer b.Unlock() |
| 144 | + |
| 145 | + return b.spill() |
| 146 | +} |
| 147 | + |
| 148 | +func (b *BufferedWriter) Read(p []byte) (int, error) { |
| 149 | + b.Lock() |
| 150 | + defer b.Unlock() |
| 151 | + |
| 152 | + return b.encodedBuffer.Read(p) |
| 153 | +} |
| 154 | + |
| 155 | +func (b *BufferedWriter) Len() int { |
| 156 | + b.Lock() |
| 157 | + defer b.Unlock() |
| 158 | + |
| 159 | + return b.encodedBuffer.Len() |
| 160 | +} |
| 161 | + |
58 | 162 | // Close flushes any buffered bytes to the underlying writer.
|
59 | 163 | func (b *BufferedWriter) Close() error {
|
60 |
| - return b.Writer.Flush() |
| 164 | + return b.spill() |
61 | 165 | }
|
62 | 166 |
|
63 |
| -// Reset resets the underlying bytes.Buffer. |
| 167 | +// Reset resets the underlying encoded buffer |
64 | 168 | func (b *BufferedWriter) Reset() {
|
65 |
| - b.Buffer.Reset() |
| 169 | + b.Lock() |
| 170 | + defer b.Unlock() |
| 171 | + |
| 172 | + b.encodedBuffer.Reset() |
66 | 173 | }
|
0 commit comments