Skip to content

Commit 029306d

Browse files
Jeremyyang920scritchley
authored andcommitted
Add Column Statistics (#55)
1 parent 6efb618 commit 029306d

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

columnstatistics.go

+72
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package orc
22

33
import (
4+
"time"
5+
46
"github.com/scritchley/orc/proto"
57
)
68

@@ -14,6 +16,8 @@ func NewColumnStatistics(category Category) ColumnStatistics {
1416
return NewStringStatistics()
1517
case CategoryBoolean:
1618
return NewBucketStatistics()
19+
case CategoryTimestamp:
20+
return NewTimestampStatistics()
1721
default:
1822
return NewBaseStatistics()
1923
}
@@ -235,3 +239,71 @@ func NewBucketStatistics() *BucketStatistics {
235239
// }
236240
// b.BaseStatistics.Add(value)
237241
// }
242+
243+
type TimestampStatistics struct {
244+
BaseStatistics
245+
minSet bool
246+
}
247+
248+
func NewTimestampStatistics() *TimestampStatistics {
249+
base := NewBaseStatistics()
250+
var max, min, maxUTC, minUTC int64
251+
252+
base.TimestampStatistics = &proto.TimestampStatistics{
253+
Maximum: &max,
254+
Minimum: &min,
255+
MaximumUtc: &maxUTC,
256+
MinimumUtc: &minUTC,
257+
}
258+
return &TimestampStatistics{
259+
BaseStatistics: base,
260+
}
261+
}
262+
263+
func (i *TimestampStatistics) Merge(other ColumnStatistics) {
264+
if is, ok := other.(*TimestampStatistics); ok {
265+
if is.TimestampStatistics.GetMaximum() > i.TimestampStatistics.GetMaximum() {
266+
i.TimestampStatistics.Maximum = is.TimestampStatistics.Maximum
267+
i.TimestampStatistics.MaximumUtc = is.TimestampStatistics.MaximumUtc
268+
}
269+
if is.TimestampStatistics.GetMinimum() < i.TimestampStatistics.GetMinimum() {
270+
i.TimestampStatistics.Minimum = is.TimestampStatistics.Minimum
271+
i.TimestampStatistics.MinimumUtc = is.TimestampStatistics.MinimumUtc
272+
}
273+
i.BaseStatistics.Merge(is.BaseStatistics)
274+
}
275+
}
276+
277+
func (i *TimestampStatistics) Add(value interface{}) {
278+
if val, ok := value.(time.Time); ok {
279+
if i.TimestampStatistics.Maximum == nil {
280+
valCopy := val.Unix()
281+
valUTCCopy := val.UTC().Unix()
282+
i.TimestampStatistics.Maximum = &valCopy
283+
i.TimestampStatistics.MaximumUtc = &valUTCCopy
284+
} else if val.After(time.Unix(i.TimestampStatistics.GetMaximum(), 0)) {
285+
*i.TimestampStatistics.Maximum = val.Unix()
286+
*i.TimestampStatistics.MaximumUtc = val.UTC().Unix()
287+
}
288+
if !i.minSet {
289+
valCopy := val.Unix()
290+
valUTCCopy := val.UTC().Unix()
291+
i.TimestampStatistics.Minimum = &valCopy
292+
i.TimestampStatistics.MinimumUtc = &valUTCCopy
293+
i.minSet = true
294+
} else if val.Before(time.Unix(i.TimestampStatistics.GetMinimum(), 0)) {
295+
*i.TimestampStatistics.Minimum = val.Unix()
296+
*i.TimestampStatistics.MinimumUtc = val.UTC().Unix()
297+
298+
}
299+
}
300+
i.BaseStatistics.Add(value)
301+
}
302+
303+
func (i *TimestampStatistics) Statistics() *proto.ColumnStatistics {
304+
return i.ColumnStatistics
305+
}
306+
307+
func (i *TimestampStatistics) Reset() {
308+
*i = *NewTimestampStatistics()
309+
}

0 commit comments

Comments
 (0)