diff --git a/README.md b/README.md index 836ba93..2ec092c 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,16 @@ That’s the point. --- +## related notes + +Find notes that read similarly to a given entry: + +```bash +jot related 12 +``` + +It prints the top 5 related notes (if available), each prefixed with its note ID. + ## what should I write? If you’re unsure, start here: diff --git a/main.go b/main.go index e73e2cb..9a2af55 100644 --- a/main.go +++ b/main.go @@ -5,8 +5,11 @@ import ( "errors" "fmt" "io" + "math" "os" "path/filepath" + "sort" + "strconv" "strings" "time" ) @@ -38,7 +41,20 @@ func main() { return } - fmt.Fprintln(os.Stderr, "usage: jot [init|list|patterns]") + if len(args) == 2 && args[0] == "related" { + id, err := strconv.Atoi(args[1]) + if err != nil || id <= 0 { + fmt.Fprintln(os.Stderr, "note-id must be a positive integer") + os.Exit(1) + } + if err := jotRelated(os.Stdout, id); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + return + } + + fmt.Fprintln(os.Stderr, "usage: jot [init|list|patterns|related ]") os.Exit(1) } @@ -179,3 +195,171 @@ func isTTY(w io.Writer) bool { } return (info.Mode() & os.ModeCharDevice) != 0 } + +type note struct { + ID int + Text string + Raw string +} + +func jotRelated(w io.Writer, id int) error { + notes, err := loadNotes() + if err != nil { + return err + } + if len(notes) == 0 { + return errors.New("no notes found") + } + if id < 1 || id > len(notes) { + return fmt.Errorf("note-id must be between 1 and %d", len(notes)) + } + + vectors := tfidfVectors(notes) + base := vectors[id-1] + baseNorm := vectorNorm(base) + if baseNorm == 0 { + return errors.New("note has no searchable content") + } + + type scoredNote struct { + Note note + Score float64 + } + + var scored []scoredNote + for i, n := range notes { + if n.ID == id { + continue + } + score := cosineSimilarity(base, baseNorm, vectors[i]) + if score <= 0 { + continue + } + scored = append(scored, scoredNote{Note: n, Score: score}) + } + + sort.Slice(scored, func(i, j int) bool { + if scored[i].Score == scored[j].Score { + return scored[i].Note.ID < scored[j].Note.ID + } + return scored[i].Score > scored[j].Score + }) + + limit := 5 + if len(scored) < limit { + limit = len(scored) + } + for i := 0; i < limit; i++ { + if _, err := fmt.Fprintf(w, "%d\t%s\n", scored[i].Note.ID, scored[i].Note.Raw); err != nil { + return err + } + } + + return nil +} + +func loadNotes() ([]note, error) { + journalPath, err := ensureJournal() + if err != nil { + return nil, err + } + + file, err := os.Open(journalPath) + if err != nil { + return nil, err + } + defer file.Close() + + var notes []note + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + trimmed := strings.TrimSpace(line) + if trimmed == "" { + continue + } + text := extractNoteText(line) + notes = append(notes, note{ID: len(notes) + 1, Text: text, Raw: line}) + } + if err := scanner.Err(); err != nil { + return nil, err + } + + return notes, nil +} + +func extractNoteText(line string) string { + if strings.HasPrefix(line, "[") { + if end := strings.IndexByte(line, ']'); end > 0 { + return strings.TrimSpace(line[end+1:]) + } + } + return strings.TrimSpace(line) +} + +func tfidfVectors(notes []note) []map[string]float64 { + termCounts := make([]map[string]int, len(notes)) + docFreq := make(map[string]int) + + for i, n := range notes { + tokens := tokenize(n.Text) + counts := make(map[string]int) + for _, token := range tokens { + counts[token]++ + } + termCounts[i] = counts + for term := range counts { + docFreq[term]++ + } + } + + N := float64(len(notes)) + vectors := make([]map[string]float64, len(notes)) + for i, counts := range termCounts { + total := 0 + for _, count := range counts { + total += count + } + vector := make(map[string]float64) + if total == 0 { + vectors[i] = vector + continue + } + for term, count := range counts { + tf := float64(count) / float64(total) + idf := math.Log((1+N)/(1+float64(docFreq[term]))) + 1 + vector[term] = tf * idf + } + vectors[i] = vector + } + + return vectors +} + +func tokenize(text string) []string { + return strings.FieldsFunc(strings.ToLower(text), func(r rune) bool { + return (r < 'a' || r > 'z') && (r < '0' || r > '9') + }) +} + +func vectorNorm(vector map[string]float64) float64 { + var sum float64 + for _, v := range vector { + sum += v * v + } + return math.Sqrt(sum) +} + +func cosineSimilarity(base map[string]float64, baseNorm float64, other map[string]float64) float64 { + otherNorm := vectorNorm(other) + if baseNorm == 0 || otherNorm == 0 { + return 0 + } + var dot float64 + for term, weight := range base { + if otherWeight, ok := other[term]; ok { + dot += weight * otherWeight + } + } + return dot / (baseNorm * otherNorm) +} diff --git a/main_test.go b/main_test.go index 05850a6..0ef8697 100644 --- a/main_test.go +++ b/main_test.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "io" "os" "runtime" "strings" @@ -123,3 +124,61 @@ func TestJotInitAppendsWithTimestamp(t *testing.T) { t.Fatalf("expected entry %q, got %q", expectedEntry, string(data)) } } + +func TestJotRelatedOutputsTopMatches(t *testing.T) { + home := withTempHome(t) + journalDir, journalPath := journalPaths(home) + + if err := os.MkdirAll(journalDir, 0o700); err != nil { + t.Fatalf("mkdir failed: %v", err) + } + + entries := strings.Join([]string{ + "[2024-01-01 10:00] go unit tests tfidf", + "[2024-01-02 10:00] go unit tests tfidf", + "[2024-01-03 10:00] cooking pasta recipe", + "[2024-01-04 10:00] go concurrency patterns", + "[2024-01-05 10:00] gardening tips", + "[2024-01-06 10:00] go tests table driven", + }, "\n") + "\n" + + if err := os.WriteFile(journalPath, []byte(entries), 0o600); err != nil { + t.Fatalf("write failed: %v", err) + } + + var out bytes.Buffer + if err := jotRelated(&out, 1); err != nil { + t.Fatalf("jotRelated returned error: %v", err) + } + + lines := strings.Split(strings.TrimSpace(out.String()), "\n") + if len(lines) == 0 { + t.Fatalf("expected related notes, got none") + } + + if !strings.HasPrefix(lines[0], "2\t") { + t.Fatalf("expected note 2 to be top match, got %q", lines[0]) + } +} + +func TestJotRelatedRejectsOutOfRangeID(t *testing.T) { + home := withTempHome(t) + journalDir, journalPath := journalPaths(home) + + if err := os.MkdirAll(journalDir, 0o700); err != nil { + t.Fatalf("mkdir failed: %v", err) + } + + entries := strings.Join([]string{ + "[2024-01-01 10:00] one", + "[2024-01-02 10:00] two", + }, "\n") + "\n" + + if err := os.WriteFile(journalPath, []byte(entries), 0o600); err != nil { + t.Fatalf("write failed: %v", err) + } + + if err := jotRelated(io.Discard, 3); err == nil { + t.Fatalf("expected out of range error, got nil") + } +}