ergo/irc/history/history.go

464 lines
11 KiB
Go

// Copyright (c) 2018 Shivaram Lingamneni <slingamn@cs.stanford.edu>
// released under the MIT license
package history
import (
"github.com/oragono/oragono/irc/utils"
"sync"
"time"
)
type ItemType uint
const (
uninitializedItem ItemType = iota
Privmsg
Notice
Join
Part
Kick
Quit
Mode
Tagmsg
Nick
Topic
)
const (
initialAutoSize = 32
)
// a Tagmsg that consists entirely of transient tags is not stored
var transientTags = map[string]bool{
"+draft/typing": true,
"+typing": true, // future-proofing
}
// Item represents an event (e.g., a PRIVMSG or a JOIN) and its associated data
type Item struct {
Type ItemType
Nick string
// this is the uncasefolded account name, if there's no account it should be set to "*"
AccountName string
// for non-privmsg items, we may stuff some other data in here
Message utils.SplitMessage
Tags map[string]string
Params [1]string
// for a DM, this is the casefolded nickname of the other party (whether this is
// an incoming or outgoing message). this lets us emulate the "query buffer" functionality
// required by CHATHISTORY:
CfCorrespondent string
}
// HasMsgid tests whether a message has the message id `msgid`.
func (item *Item) HasMsgid(msgid string) bool {
return item.Message.Msgid == msgid
}
func (item *Item) IsStorable() bool {
switch item.Type {
case Tagmsg:
for name := range item.Tags {
if !transientTags[name] {
return true
}
}
return false // all tags were blacklisted
case Privmsg, Notice:
// don't store CTCP other than ACTION
return !item.Message.IsRestrictedCTCPMessage()
default:
return true
}
}
type Predicate func(item *Item) (matches bool)
func Reverse(results []Item) {
for i, j := 0, len(results)-1; i < j; i, j = i+1, j-1 {
results[i], results[j] = results[j], results[i]
}
}
// Buffer is a ring buffer holding message/event history for a channel or user
type Buffer struct {
sync.RWMutex
// ring buffer, see irc/whowas.go for conventions
buffer []Item
start int
end int
maximumSize int
window time.Duration
lastDiscarded time.Time
nowFunc func() time.Time
}
func NewHistoryBuffer(size int, window time.Duration) (result *Buffer) {
result = new(Buffer)
result.Initialize(size, window)
return
}
func (hist *Buffer) Initialize(size int, window time.Duration) {
hist.buffer = make([]Item, hist.initialSize(size, window))
hist.start = -1
hist.end = -1
hist.window = window
hist.maximumSize = size
hist.nowFunc = time.Now
}
// compute the initial size for the buffer, taking into account autoresize
func (hist *Buffer) initialSize(size int, window time.Duration) (result int) {
result = size
if window != 0 {
result = initialAutoSize
if size < result {
result = size // min(initialAutoSize, size)
}
}
return
}
// Add adds a history item to the buffer
func (list *Buffer) Add(item Item) {
if item.Message.Time.IsZero() {
item.Message.Time = time.Now().UTC()
}
list.Lock()
defer list.Unlock()
if len(list.buffer) == 0 {
return
}
list.maybeExpand()
var pos int
if list.start == -1 { // empty
pos = 0
list.start = 0
list.end = 1 % len(list.buffer)
} else if list.start != list.end { // partially full
pos = list.end
list.end = (list.end + 1) % len(list.buffer)
} else if list.start == list.end { // full
pos = list.end
list.end = (list.end + 1) % len(list.buffer)
list.start = list.end // advance start as well, overwriting first entry
// record the timestamp of the overwritten item
if list.lastDiscarded.Before(list.buffer[pos].Message.Time) {
list.lastDiscarded = list.buffer[pos].Message.Time
}
}
list.buffer[pos] = item
}
func (list *Buffer) lookup(msgid string) (result Item, found bool) {
predicate := func(item *Item) bool {
return item.HasMsgid(msgid)
}
results := list.matchInternal(predicate, false, 1)
if len(results) != 0 {
return results[0], true
}
return
}
// Between returns all history items with a time `after` <= time <= `before`,
// with an indication of whether the results are complete or are missing items
// because some of that period was discarded. A zero value of `before` is considered
// higher than all other times.
func (list *Buffer) betweenHelper(start, end Selector, cutoff time.Time, pred Predicate, limit int) (results []Item, complete bool, err error) {
var ascending bool
defer func() {
if !ascending {
Reverse(results)
}
}()
list.RLock()
defer list.RUnlock()
if len(list.buffer) == 0 {
return
}
after := start.Time
if start.Msgid != "" {
item, found := list.lookup(start.Msgid)
if !found {
return
}
after = item.Message.Time
}
before := end.Time
if end.Msgid != "" {
item, found := list.lookup(end.Msgid)
if !found {
return
}
before = item.Message.Time
}
after, before, ascending = MinMaxAsc(after, before, cutoff)
complete = after.Equal(list.lastDiscarded) || after.After(list.lastDiscarded)
satisfies := func(item *Item) bool {
return (after.IsZero() || item.Message.Time.After(after)) &&
(before.IsZero() || item.Message.Time.Before(before)) &&
(pred == nil || pred(item))
}
return list.matchInternal(satisfies, ascending, limit), complete, nil
}
// implements history.Sequence, emulating a single history buffer (for a channel,
// a single user's DMs, or a DM conversation)
type bufferSequence struct {
list *Buffer
pred Predicate
cutoff time.Time
}
func (list *Buffer) MakeSequence(correspondent string, cutoff time.Time) Sequence {
var pred Predicate
if correspondent != "" {
pred = func(item *Item) bool {
return item.CfCorrespondent == correspondent
}
}
return &bufferSequence{
list: list,
pred: pred,
cutoff: cutoff,
}
}
func (seq *bufferSequence) Between(start, end Selector, limit int) (results []Item, complete bool, err error) {
return seq.list.betweenHelper(start, end, seq.cutoff, seq.pred, limit)
}
func (seq *bufferSequence) Around(start Selector, limit int) (results []Item, err error) {
return GenericAround(seq, start, limit)
}
// you must be holding the read lock to call this
func (list *Buffer) matchInternal(predicate Predicate, ascending bool, limit int) (results []Item) {
if list.start == -1 || len(list.buffer) == 0 {
return
}
var pos, stop int
if ascending {
pos = list.start
stop = list.prev(list.end)
} else {
pos = list.prev(list.end)
stop = list.start
}
for {
if predicate(&list.buffer[pos]) {
results = append(results, list.buffer[pos])
}
if pos == stop || (limit != 0 && len(results) == limit) {
break
}
if ascending {
pos = list.next(pos)
} else {
pos = list.prev(pos)
}
}
return
}
// Delete deletes messages matching some predicate.
func (list *Buffer) Delete(predicate Predicate) (count int) {
list.Lock()
defer list.Unlock()
if list.start == -1 || len(list.buffer) == 0 {
return
}
pos := list.start
stop := list.prev(list.end)
for {
if predicate(&list.buffer[pos]) {
list.buffer[pos] = Item{}
count++
}
if pos == stop {
break
}
pos = list.next(pos)
}
return
}
// latest returns the items most recently added, up to `limit`. If `limit` is 0,
// it returns all items.
func (list *Buffer) latest(limit int) (results []Item) {
results, _, _ = list.betweenHelper(Selector{}, Selector{}, time.Time{}, nil, limit)
return
}
// LastDiscarded returns the latest time of any entry that was evicted
// from the ring buffer.
func (list *Buffer) LastDiscarded() time.Time {
list.RLock()
defer list.RUnlock()
return list.lastDiscarded
}
func (list *Buffer) prev(index int) int {
switch index {
case 0:
return len(list.buffer) - 1
default:
return index - 1
}
}
func (list *Buffer) next(index int) int {
switch index {
case len(list.buffer) - 1:
return 0
default:
return index + 1
}
}
// return n such that v <= n and n == 2**i for some i
func roundUpToPowerOfTwo(v int) int {
// http://graphics.stanford.edu/~seander/bithacks.html
v -= 1
v |= v >> 1
v |= v >> 2
v |= v >> 4
v |= v >> 8
v |= v >> 16
return v + 1
}
func (list *Buffer) maybeExpand() {
if list.window == 0 {
return // autoresize is disabled
}
length := list.length()
if length < len(list.buffer) {
return // we have spare capacity already
}
if len(list.buffer) == list.maximumSize {
return // cannot expand any further
}
wouldDiscard := list.buffer[list.start].Message.Time
if list.window < list.nowFunc().Sub(wouldDiscard) {
return // oldest element is old enough to overwrite
}
newSize := roundUpToPowerOfTwo(length + 1)
if list.maximumSize < newSize {
newSize = list.maximumSize
}
list.resize(newSize)
}
// Resize shrinks or expands the buffer
func (list *Buffer) Resize(maximumSize int, window time.Duration) {
list.Lock()
defer list.Unlock()
if list.maximumSize == maximumSize && list.window == window {
return // no-op
}
list.maximumSize = maximumSize
list.window = window
// three cases where we need to preemptively resize:
// (1) we are not autoresizing
// (2) the buffer is currently larger than maximumSize and needs to be shrunk
// (3) the buffer is currently smaller than the recommended initial size
// (including the case where it is currently disabled and needs to be enabled)
// TODO make it possible to shrink the buffer so that it only contains `window`
if window == 0 || maximumSize < len(list.buffer) {
list.resize(maximumSize)
} else {
initialSize := list.initialSize(maximumSize, window)
if len(list.buffer) < initialSize {
list.resize(initialSize)
}
}
}
func (list *Buffer) resize(size int) {
newbuffer := make([]Item, size)
if list.start == -1 {
// indices are already correct and nothing needs to be copied
} else if size == 0 {
// this is now the empty list
list.start = -1
list.end = -1
} else {
currentLength := list.length()
start := list.start
end := list.end
// if we're truncating, keep the latest entries, not the earliest
if size < currentLength {
start = list.end - size
if start < 0 {
start += len(list.buffer)
}
// update lastDiscarded for discarded entries
for i := list.start; i != start; i = (i + 1) % len(list.buffer) {
if list.lastDiscarded.Before(list.buffer[i].Message.Time) {
list.lastDiscarded = list.buffer[i].Message.Time
}
}
}
if start < end {
copied := copy(newbuffer, list.buffer[start:end])
list.start = 0
list.end = copied % size
} else {
lenInitial := len(list.buffer) - start
copied := copy(newbuffer, list.buffer[start:])
copied += copy(newbuffer[lenInitial:], list.buffer[:end])
list.start = 0
list.end = copied % size
}
}
list.buffer = newbuffer
}
func (hist *Buffer) length() int {
if hist.start == -1 {
return 0
} else if hist.start < hist.end {
return hist.end - hist.start
} else {
return len(hist.buffer) - (hist.start - hist.end)
}
}