Commit c757113
Eric Bower
·
2026-05-12 00:06:58 -0400 EDT
parent cce1481
chore: better text file detection
1 files changed,
+64,
-6
M
main.go
M
main.go
+64,
-6
1@@ -9,7 +9,6 @@ import (
2 "html/template"
3 "io"
4 "log/slog"
5- "math"
6 "os"
7 "path/filepath"
8 "sort"
9@@ -284,13 +283,72 @@ func isText(s string) bool {
10 return true
11 }
12
13+// newSet builds a string set from a list of values for O(1) lookup.
14+func newSet(vals ...string) map[string]struct{} {
15+ m := make(map[string]struct{}, len(vals))
16+ for _, v := range vals {
17+ m[v] = struct{}{}
18+ }
19+ return m
20+}
21+
22+// knownBinaryExts lists extensions that are always binary regardless of content.
23+var knownBinaryExts = newSet(
24+ // images
25+ ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".webp", ".svg", ".tiff", ".tif", ".psd",
26+ // archives / compressed
27+ ".zip", ".tar", ".gz", ".bz2", ".xz", ".7z", ".rar", ".tgz", ".war", ".jar",
28+ // documents
29+ ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
30+ // executables / libraries
31+ ".exe", ".dll", ".so", ".dylib", ".a", ".o",
32+ // fonts
33+ ".ttf", ".otf", ".woff", ".woff2", ".eot",
34+ // audio / video
35+ ".mp3", ".mp4", ".avi", ".mov", ".wav", ".flac", ".ogg", ".webm",
36+ // data / serialized
37+ ".pb", ".msgpack", ".parquet", ".avro",
38+ // other
39+ ".class", ".pyc", ".pyo", ".wasm", ".db", ".sqlite", ".sqlite3",
40+)
41+
42+// knownTextExts lists extensions that are always text regardless of content.
43+var knownTextExts = newSet(
44+ // code
45+ ".go", ".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".c", ".h", ".cpp", ".hpp", ".rs", ".rb", ".php", ".pl", ".sh", ".bash", ".zsh", ".fish", ".ps1",
46+ // markup / data
47+ ".html", ".htm", ".css", ".scss", ".less", ".xml", ".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf",
48+ // docs
49+ ".md", ".markdown", ".txt", ".rst", ".tex", ".bib", ".csv", ".tsv",
50+ // config / build
51+ ".Dockerfile", ".dockerignore", ".gitignore", ".gitattributes", ".editorconfig",
52+ // other text
53+ ".diff", ".patch", ".log", ".sql", ".graphql", ".proto", ".makefile", ".cmake",
54+)
55+
56 // isTextFile reports whether the file has a known extension indicating
57 // a text file, or if a significant chunk of the specified file looks like
58 // correct UTF-8; that is, if it is likely that the file contains human-
59-// readable text.
60-func isTextFile(text string) bool {
61- num := math.Min(float64(len(text)), 1024)
62- return isText(text[0:int(num)])
63+// readable text. Extension check takes priority as a fast path.
64+func isTextFile(filename, text string) bool {
65+ ext := strings.ToLower(filepath.Ext(filename))
66+
67+ // fast path: known binary extension
68+ if _, ok := knownBinaryExts[ext]; ok {
69+ return false
70+ }
71+ // fast path: known text extension
72+ if _, ok := knownTextExts[ext]; ok {
73+ return true
74+ }
75+ // also check the full filename for extensionless known text files
76+ nameLower := strings.ToLower(filename)
77+ if _, ok := knownTextExts["."+nameLower]; ok {
78+ return true
79+ }
80+
81+ // fallback: inspect bytes
82+ return isText(text)
83 }
84
85 func toPretty(b int64) string {
86@@ -454,7 +512,7 @@ func (c *Config) writeHTMLTreeFile(pageData *PageData, treeItem *TreeItem) strin
87 bail(err)
88 str := string(b)
89
90- treeItem.IsTextFile = isTextFile(str)
91+ treeItem.IsTextFile = isTextFile(treeItem.Entry.Name(), str)
92
93 contents := "binary file, cannot display"
94 if treeItem.IsTextFile {