commit 4f291c93e5e3a44aa58da16f64d69978be8ff1bf Author: tijl Date: Wed Aug 6 10:23:12 2025 +0200 first commit diff --git a/client.go b/client.go new file mode 100644 index 0000000..c7e67c7 --- /dev/null +++ b/client.go @@ -0,0 +1,72 @@ +package shortify + +import ( + "bytes" + "context" + "errors" + "io" + "net" + "net/http" + "time" +) + +type ClientConfig struct { + UseUnixSocket bool // true = unix socket, false = http + SocketPath string // e.g. /tmp/shorty.sock + HTTPAddress string // e.g. http://localhost:8080 + Timeout time.Duration +} + +type Client struct { + httpClient *http.Client + baseURL string +} + +func NewClient(cfg ClientConfig) (*Client, error) { + transport := &http.Transport{} + + if cfg.UseUnixSocket { + dialer := func(_ context.Context, _, _ string) (net.Conn, error) { + return net.Dial("unix", cfg.SocketPath) + } + transport.DialContext = dialer + cfg.HTTPAddress = "http://unix" // dummy for request building + } + + client := &http.Client{ + Transport: transport, + Timeout: cfg.Timeout, + } + + return &Client{ + httpClient: client, + baseURL: cfg.HTTPAddress, + }, nil +} + +func (c *Client) Shorten(url string) (string, error) { + body := []byte(url) + + req, err := http.NewRequest("POST", c.baseURL+"/s", bytes.NewReader(body)) + if err != nil { + return "", err + } + + resp, err := c.httpClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(resp.Body) + return "", errors.New("shorten failed: " + string(b)) + } + + result, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + return string(result), nil +} diff --git a/examples/main.go b/examples/main.go new file mode 100644 index 0000000..3859832 --- /dev/null +++ b/examples/main.go @@ -0,0 +1,19 @@ +package main + +import ( + "git.tijl.dev/tijl/shortify" +) + +func main() { + s, err := shortify.NewShortener(shortify.Config{ + DataFolder: "./shortify", + CacheSize: 10000, + AccessLogSize: 10000, + }) + if err != nil { + panic(err) + } + + go s.ServeSocket("./data/shortify.sock") + s.ServeHTTP("0.0.0.0:3001") +} diff --git a/generation.go b/generation.go new file mode 100644 index 0000000..fb870d5 --- /dev/null +++ b/generation.go @@ -0,0 +1,218 @@ +package shortify + +import ( + "crypto/rand" + "errors" + "log" + "math/big" + "sync" + "time" + + "go.etcd.io/bbolt" +) + +type IDPool struct { + db *bbolt.DB + mu sync.Mutex + idLen int + poolCap int + regenThresh int + inMemoryPool []string + cond *sync.Cond + stopCh chan struct{} + usedChan chan string +} + +func NewIDPool(db *bbolt.DB, idLen int, poolCap int, regenThresh int) (*IDPool, error) { + p := &IDPool{ + db: db, + idLen: idLen, + poolCap: poolCap, + regenThresh: regenThresh, + stopCh: make(chan struct{}), + usedChan: make(chan string, 1000), + } + p.cond = sync.NewCond(&p.mu) + + err := db.Update(func(tx *bbolt.Tx) error { + _, err := tx.CreateBucketIfNotExists(idpoolBucket) + return err + }) + if err != nil { + return nil, err + } + + if err := p.loadFromDB(); err != nil { + return nil, err + } + + if len(p.inMemoryPool) == 0 { + // idpool empty at startup, generating initial batch... + if err := p.GenerateBatch(); err != nil { + return nil, err + } + if err := p.loadFromDB(); err != nil { + return nil, err + } + } + + go p.backgroundGenerator() + go p.flushUsedIDs() + + return p, nil +} + +func (p *IDPool) loadFromDB() error { + p.mu.Lock() + defer p.mu.Unlock() + + var ids []string + + err := p.db.View(func(tx *bbolt.Tx) error { + b := tx.Bucket(idpoolBucket) + if b == nil { + return nil + } + + c := b.Cursor() + for k, _ := c.First(); k != nil; k, _ = c.Next() { + ids = append(ids, string(k)) + } + return nil + }) + + if err != nil { + return err + } + + p.inMemoryPool = ids + return nil +} + +func (p *IDPool) GenerateBatch() error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.db.Update(func(tx *bbolt.Tx) error { + b := tx.Bucket(idpoolBucket) + count := b.Stats().KeyN + for count < p.poolCap { + id, err := generateID(p.idLen) + if err != nil { + return err + } + if b.Get([]byte(id)) != nil { + continue + } + if err := b.Put([]byte(id), []byte{}); err != nil { + return err + } + count++ + } + return nil + }) +} + +// PopID returns an ID from memory and queues it for async DB removal +func (p *IDPool) PopID() (string, error) { + p.mu.Lock() + defer p.mu.Unlock() + + if len(p.inMemoryPool) == 0 { + return "", errors.New("id pool empty") + } + + // Fast O(1) pop + id := p.inMemoryPool[0] + p.inMemoryPool = p.inMemoryPool[1:] + + // Queue for async delete + select { + case p.usedChan <- id: + default: + // If the channel is full, we drop the delete. Risky only if shutdown happens + log.Println("Warning: used ID queue full; delete may be delayed") + } + + // Signal for batch regen if low + if len(p.inMemoryPool) < p.regenThresh { + p.cond.Signal() + } + + return id, nil +} + +func (p *IDPool) backgroundGenerator() { + for { + p.mu.Lock() + for len(p.inMemoryPool) >= p.regenThresh { + p.cond.Wait() + } + p.mu.Unlock() + + select { + case <-p.stopCh: + return + default: + } + + // generating batch + err := p.GenerateBatch() + if err != nil { + // error generating batch:( + time.Sleep(time.Second * 5) + continue + } + if err := p.loadFromDB(); err != nil { + // error laoding from db:( + } + } +} + +func (p *IDPool) flushUsedIDs() { + for { + select { + case <-p.stopCh: + return + case id := <-p.usedChan: + err := p.db.Update(func(tx *bbolt.Tx) error { + b := tx.Bucket(idpoolBucket) + return b.Delete([]byte(id)) + }) + if err != nil { + log.Printf("Failed to delete used ID %s: %v\n", id, err) + } + } + } +} + +func (p *IDPool) Stop() { + close(p.stopCh) + // Drain and flush remaining used IDs + for { + select { + case id := <-p.usedChan: + p.db.Update(func(tx *bbolt.Tx) error { + b := tx.Bucket(idpoolBucket) + return b.Delete([]byte(id)) + }) + default: + return + } + } +} + +var idpoolBucket = []byte("idpool") +var base62 = []rune("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") + +func generateID(n int) (string, error) { + id := make([]rune, n) + for i := range id { + num, err := rand.Int(rand.Reader, big.NewInt(int64(len(base62)))) + if err != nil { + return "", err + } + id[i] = base62[num.Int64()] + } + return string(id), nil +} diff --git a/global.go b/global.go new file mode 100644 index 0000000..f5543ec --- /dev/null +++ b/global.go @@ -0,0 +1,35 @@ +package shortify + +import ( + "sync" + + "go.etcd.io/bbolt" +) + +// Global instance +var ( + Global *Shortener + once sync.Once +) + +func Init(cfg Config) error { + var err error + once.Do(func() { + Global, err = NewShortener(cfg) + }) + return err +} + +// Global instance (duc) +var ( + GlobalIDPool *IDPool + oncePopper sync.Once +) + +func InitIDPool(db *bbolt.DB, idLen, poolCap, regenThresh int) error { + var err error + oncePopper.Do(func() { + GlobalIDPool, err = NewIDPool(db, idLen, poolCap, regenThresh) + }) + return err +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..2538c65 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.tijl.dev/tijl/shortify + +go 1.24.3 diff --git a/logger.go b/logger.go new file mode 100644 index 0000000..e494edd --- /dev/null +++ b/logger.go @@ -0,0 +1,72 @@ +package shortify + +import ( + "database/sql" + "fmt" + "time" + + _ "github.com/marcboeker/go-duckdb/v2" +) + +type VisitLog struct { + ShortID string + LongURL string + + IP string + + UserAgent string + Referer string + Language string + + Time time.Time +} + +func (s *Shortener) LogVisit(log VisitLog) { + s.logChan <- log +} + +func (s *Shortener) startLogging() { + + db, err := sql.Open("duckdb", s.DataFolder+"/analytics.db") + if err != nil { + fmt.Println(err) + return + } + defer db.Close() + + _, err = db.Exec(` +CREATE SEQUENCE IF NOT EXISTS seq_visitid START 1; +CREATE TABLE IF NOT EXISTS visits ( + id INTEGER PRIMARY KEY, + shortid VARCHAR, + longurl VARCHAR, + ip VARCHAR, + useragent VARCHAR, + referer VARCHAR, + language VARCHAR, + time TIMESTAMP +); + `) + if err != nil { + fmt.Println(err) + return + } + + stmt, err := db.Prepare(` + INSERT INTO visits (id, shortid, longurl, ip, useragent, referer, language, time) + VALUES (nextval('seq_visitid'), ?, ?, ?, ?, ?, ?, ?) +`) + if err != nil { + fmt.Println(err) + return + } + defer stmt.Close() + + for log := range s.logChan { + _, err = stmt.Exec(log.ShortID, log.LongURL, log.IP, log.UserAgent, log.Referer, log.Language, log.Time) + if err != nil { + fmt.Println(err) + return + } + } +} diff --git a/server.go b/server.go new file mode 100644 index 0000000..44c31d2 --- /dev/null +++ b/server.go @@ -0,0 +1,122 @@ +package shortify + +import ( + "errors" + "log" + "net" + "os" + "time" + + "github.com/gofiber/fiber/v2" +) + +// wow +func (s *Shortener) ServeSocket(path string) { + + // Remove old socket if exists + socketPath := path + _ = os.Remove(socketPath) + + // Create a Unix socket listener + ln, err := net.Listen("unix", socketPath) + if err != nil { + log.Fatalf("Failed to listen on unix socket: %v", err) + } + + // Optionally set permissions so other processes can connect + _ = os.Chmod(socketPath, 0666) + + app := fiber.New() + + app.Post("/s", s.HandlePOSTShortURLDirect()) + + log.Fatal(app.Listener(ln)) +} +func (s *Shortener) ServeHTTP(addr string) { + app := fiber.New() + + app.Get("/s/:id", s.HandleGETShortURL()) + + log.Fatal(app.Listen(addr)) +} + +func (s *Shortener) NewShortURL(longUrl string) string { + shortID, err := s.idPool.PopID() + if err != nil { + return "" + } + + err = s.put(shortID, longUrl) + if err != nil { + return "" + } + + return shortID +} + +func (s *Shortener) HandleGETShortURL() func(*fiber.Ctx) error { + return func(c *fiber.Ctx) error { + shortID := c.Params("id") + + url, err := s.get(shortID) + if err != nil { + return err + } + + s.LogVisit(VisitLog{ + ShortID: shortID, + LongURL: url, + + IP: c.IP(), + + UserAgent: string(c.Context().UserAgent()), + Language: c.GetReqHeaders()["Accept-Language"][0], + Referer: string(c.Request().Header.Referer()), + + Time: time.Now(), + }) + + c.Set("Referrer-Policy", "no-referrer") + return c.Redirect(url, fiber.StatusFound) + } +} + +func (s *Shortener) HandlePOSTShortURL() func(*fiber.Ctx) error { + return func(c *fiber.Ctx) error { + longUrl := string(c.Body()) + + if longUrl == "" { + return errors.New("whut") + } + + shortID, err := s.idPool.PopID() + if err != nil { + return err + } + + err = s.put(shortID, longUrl) + if err != nil { + return err + } + + return c.SendString(shortID) + } +} + +func (s *Shortener) HandlePOSTShortURLDirect() func(*fiber.Ctx) error { + return func(c *fiber.Ctx) error { + longUrl := string(c.Body()) + shortID := c.Query("s") + + if longUrl == "" || shortID == "" { + return errors.New("no comment") + } + + err := s.put(shortID, longUrl) + if err != nil { + return err + } + + return c.SendString(shortID) + } +} diff --git a/shortify.go b/shortify.go new file mode 100644 index 0000000..24d73d5 --- /dev/null +++ b/shortify.go @@ -0,0 +1,58 @@ +package shortify + +import ( + "sync" + + "github.com/hashicorp/golang-lru" + bolt "go.etcd.io/bbolt" +) + +type Shortener struct { + DataFolder string + + db *bolt.DB + cache *lru.Cache + accessCache *lru.Cache + logChan chan VisitLog + idPool *IDPool + + writeChan chan [2]string // queue of writes: [shortID, longURL] + memStore sync.Map // thread-safe built-in + +} + +type Config struct { + DataFolder string + CacheSize int + AccessLogSize int +} + +func NewShortener(cfg Config) (*Shortener, error) { + db, err := bolt.Open(cfg.DataFolder+"/database.db", 0600, nil) + if err != nil { + return nil, err + } + + urlCache, _ := lru.New(cfg.CacheSize) + accessCache, _ := lru.New(cfg.AccessLogSize) + + idPool, err := NewIDPool(db, 8, 10000, 2000) + if err != nil { + return nil, err + } + + s := &Shortener{ + DataFolder: cfg.DataFolder, + db: db, + cache: urlCache, + accessCache: accessCache, + logChan: make(chan VisitLog, 1000), + writeChan: make(chan [2]string, 1000), + idPool: idPool, + } + + go s.startLogging() + go s.asyncDBWriter() + + return s, nil +} diff --git a/storage.go b/storage.go new file mode 100644 index 0000000..ec77850 --- /dev/null +++ b/storage.go @@ -0,0 +1,71 @@ +package shortify + +import ( + "errors" + "log" + + "go.etcd.io/bbolt" +) + +var bucketName = []byte("shorturls") + +func (s *Shortener) get(shortID string) (string, error) { + if val, ok := s.memStore.Load(shortID); ok { + s.cache.Add(shortID, val.(string)) + return val.(string), nil + } + + if val, ok := s.cache.Get(shortID); ok { + return val.(string), nil + } + + var longURL string + err := s.db.View(func(tx *bbolt.Tx) error { + b := tx.Bucket(bucketName) + if b == nil { + return errors.New("not found") + } + v := b.Get([]byte(shortID)) + if v != nil { + longURL = string(v) + s.cache.Add(shortID, longURL) + return nil + } + return errors.New("not found") + }) + + return longURL, err +} + +func (s *Shortener) put(shortID, longURL string) error { + + s.memStore.Store(shortID, longURL) + s.cache.Add(shortID, longURL) + + // Queue write to DB (non-blocking) + select { + case s.writeChan <- [2]string{shortID, longURL}: + default: + log.Println("Warning: write queue full, short URL write may be dropped") + } + + return nil +} + +func (s *Shortener) asyncDBWriter() { + for pair := range s.writeChan { + shortID := pair[0] + longURL := pair[1] + + err := s.db.Update(func(tx *bbolt.Tx) error { + b, err := tx.CreateBucketIfNotExists(bucketName) + if err != nil { + return err + } + return b.Put([]byte(shortID), []byte(longURL)) + }) + if err != nil { + log.Printf("DB write failed for %s: %v", shortID, err) + } + } +}