From 0c21dbbf5afdf69c2bcf5e867363e3fd1031a444 Mon Sep 17 00:00:00 2001 From: ixuer Date: Fri, 13 Sep 2024 14:54:49 +0800 Subject: [PATCH 1/4] feat: Add lz4 support --- fixtures/sample.lz4 | Bin 0 -> 15 bytes match_test.go | 1 + matchers/archive.go | 3 +++ 3 files changed, 4 insertions(+) create mode 100644 fixtures/sample.lz4 diff --git a/fixtures/sample.lz4 b/fixtures/sample.lz4 new file mode 100644 index 0000000000000000000000000000000000000000..fe874c356d1572af5c8dd0acf0c4e7bee3cab5c1 GIT binary patch literal 15 UcmZQk@|8$&Sk3?htg&a902ZJF2LJ#7 literal 0 HcmV?d00001 diff --git a/match_test.go b/match_test.go index 946bb25..34baa76 100644 --- a/match_test.go +++ b/match_test.go @@ -54,6 +54,7 @@ func TestMatchFile(t *testing.T) { {"zst"}, {"exr"}, {"avif"}, + {"lz4"}, } for _, test := range cases { diff --git a/matchers/archive.go b/matchers/archive.go index dd892ce..b07415e 100644 --- a/matchers/archive.go +++ b/matchers/archive.go @@ -31,6 +31,7 @@ var ( TypeAr = newType("ar", "application/x-unix-archive") TypeZ = newType("Z", "application/x-compress") TypeLz = newType("lz", "application/x-lzip") + TypeLz4 = newType("lz4", "application/x-lz4") TypeRpm = newType("rpm", "application/x-rpm") TypeElf = newType("elf", "application/x-executable") TypeDcm = newType("dcm", "application/dicom") @@ -62,6 +63,7 @@ var Archive = Map{ TypeAr: bytePrefixMatcher(arMagic), TypeZ: Z, TypeLz: bytePrefixMatcher(lzMagic), + TypeLz4: bytePrefixMatcher(lzMagic4), TypeRpm: Rpm, TypeElf: Elf, TypeDcm: Dcm, @@ -95,6 +97,7 @@ var ( arMagic = []byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E} zstdMagic = []byte{0x28, 0xB5, 0x2F, 0xFD} lzMagic = []byte{0x4C, 0x5A, 0x49, 0x50} + lzMagic4 = []byte{0x04, 0x22, 0x4D, 0x18} // 04 22 4D 18 ) func bytePrefixMatcher(magicPattern []byte) Matcher { From 1513402d9def62014a356616c511082293fd6582 Mon Sep 17 00:00:00 2001 From: ixuer Date: Fri, 13 Sep 2024 15:26:36 +0800 Subject: [PATCH 2/4] doc: Added README in simplified Chinese --- README.md | 2 + README_zh_CN.md | 297 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 README_zh_CN.md diff --git a/README.md b/README.md index 5139213..464f099 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +English | [简体中文](./README_zh_CN.md) # filetype [![GoDoc](https://godoc.org/github.com/h2non/filetype?status.svg)](https://godoc.org/github.com/h2non/filetype) [![Go Version](https://img.shields.io/badge/go-v1.0+-green.svg?style=flat)](https://github.com/h2non/gentleman) Small and dependency free [Go](https://golang.org) package to infer file and MIME type checking the [magic numbers]() signature. @@ -250,6 +251,7 @@ func main() { - **ar** - `application/x-unix-archive` - **Z** - `application/x-compress` - **lz** - `application/x-lzip` +- **lz4** - `application/x-lz4` - **rpm** - `application/x-rpm` - **elf** - `application/x-executable` - **dcm** - `application/dicom` diff --git a/README_zh_CN.md b/README_zh_CN.md new file mode 100644 index 0000000..de5bfec --- /dev/null +++ b/README_zh_CN.md @@ -0,0 +1,297 @@ +[English](./README.md) | 简体中文 +# filetype [![GoDoc](https://godoc.org/github.com/h2non/filetype?status.svg)](https://godoc.org/github.com/h2non/filetype) [![Go Version](https://img.shields.io/badge/go-v1.0+-green.svg?style=flat)](https://github.com/h2non/gentleman) + +小型且无依赖的 [Go](https://golang.org) 包,用于通过检查[魔数](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files)签名推断文件类型和 MIME 类型。 + +如需检查 SVG 文件类型,请参阅 [go-is-svg](https://github.com/h2non/go-is-svg) 包。Python 版本: [filetype.py](https://github.com/h2non/filetype.py)。 + +## 功能特性 + +- 支持[广泛的](#supported-types)文件类型 +- 提供文件扩展名和正确的 MIME 类型 +- 通过扩展名或 MIME 类型进行文件识别 +- 通过类别(图像、视频、音频等)进行文件识别 +- 提供大量辅助工具和文件匹配快捷方式 +- [可扩展](#add-additional-file-type-matchers):可以添加自定义的新类型和匹配器 +- 简单且语义化的 API +- [速度极快](#benchmarks),即使处理大型文件也不例外 +- 只需要前 262 字节的文件头,因此你可以[传递一个切片](#file-header) +- 无依赖(仅需 Go 代码,无需 C 语言编译) +- 跨平台文件识别 + +## 安装 + +```bash +go get github.com/h2non/filetype +``` + +## API + +请参考[Godoc](https://godoc.org/github.com/h2non/filetype) . + +### 关联子包 + +- [`github.com/h2non/filetype/types`](https://godoc.org/github.com/h2non/filetype/types) +- [`github.com/h2non/filetype/matchers`](https://godoc.org/github.com/h2non/filetype/matchers) + +## 示例 + +#### 简单的文件类型检查 + +```go +package main + +import ( + "fmt" + "io/ioutil" + + "github.com/h2non/filetype" +) + +func main() { + buf, _ := ioutil.ReadFile("sample.jpg") + + kind, _ := filetype.Match(buf) + if kind == filetype.Unknown { + fmt.Println("Unknown file type") + return + } + + fmt.Printf("File type: %s. MIME: %s\n", kind.Extension, kind.MIME.Value) +} +``` + +#### 检查类型类别 + +```go +package main + +import ( + "fmt" + "io/ioutil" + + "github.com/h2non/filetype" +) + +func main() { + buf, _ := ioutil.ReadFile("sample.jpg") + + if filetype.IsImage(buf) { + fmt.Println("File is an image") + } else { + fmt.Println("Not an image") + } +} +``` + +#### 类型支持 + +```go +package main + +import ( + "fmt" + + "github.com/h2non/filetype" +) + +func main() { + // Check if file is supported by extension + if filetype.IsSupported("jpg") { + fmt.Println("Extension supported") + } else { + fmt.Println("Extension not supported") + } + + // Check if file is supported by extension + if filetype.IsMIMESupported("image/jpeg") { + fmt.Println("MIME type supported") + } else { + fmt.Println("MIME type not supported") + } +} +``` + +#### 文件头 + +```go +package main + +import ( + "fmt" + "os" + + "github.com/h2non/filetype" +) + +func main() { + // Open a file descriptor + file, _ := os.Open("movie.mp4") + + // We only have to pass the file header = first 261 bytes + head := make([]byte, 261) + file.Read(head) + + if filetype.IsImage(head) { + fmt.Println("File is an image") + } else { + fmt.Println("Not an image") + } +} +``` + +#### 添加额外的文件类型匹配器 + +```go +package main + +import ( + "fmt" + + "github.com/h2non/filetype" +) + +var fooType = filetype.NewType("foo", "foo/foo") + +func fooMatcher(buf []byte) bool { + return len(buf) > 1 && buf[0] == 0x01 && buf[1] == 0x02 +} + +func main() { + // Register the new matcher and its type + filetype.AddMatcher(fooType, fooMatcher) + + // Check if the new type is supported by extension + if filetype.IsSupported("foo") { + fmt.Println("New supported type: foo") + } + + // Check if the new type is supported by MIME + if filetype.IsMIMESupported("foo/foo") { + fmt.Println("New supported MIME type: foo/foo") + } + + // Try to match the file + fooFile := []byte{0x01, 0x02} + kind, _ := filetype.Match(fooFile) + if kind == filetype.Unknown { + fmt.Println("Unknown file type") + } else { + fmt.Printf("File type matched: %s\n", kind.Extension) + } +} +``` + +## 已支持的类型 + +#### Image + +- **jpg** - `image/jpeg` +- **png** - `image/png` +- **gif** - `image/gif` +- **webp** - `image/webp` +- **cr2** - `image/x-canon-cr2` +- **tif** - `image/tiff` +- **bmp** - `image/bmp` +- **heif** - `image/heif` +- **jxr** - `image/vnd.ms-photo` +- **psd** - `image/vnd.adobe.photoshop` +- **ico** - `image/vnd.microsoft.icon` +- **dwg** - `image/vnd.dwg` +- **avif** - `image/avif` + +#### Video + +- **mp4** - `video/mp4` +- **m4v** - `video/x-m4v` +- **mkv** - `video/x-matroska` +- **webm** - `video/webm` +- **mov** - `video/quicktime` +- **avi** - `video/x-msvideo` +- **wmv** - `video/x-ms-wmv` +- **mpg** - `video/mpeg` +- **flv** - `video/x-flv` +- **3gp** - `video/3gpp` + +#### Audio + +- **mid** - `audio/midi` +- **mp3** - `audio/mpeg` +- **m4a** - `audio/mp4` +- **ogg** - `audio/ogg` +- **flac** - `audio/x-flac` +- **wav** - `audio/x-wav` +- **amr** - `audio/amr` +- **aac** - `audio/aac` +- **aiff** - `audio/x-aiff` + +#### Archive + +- **epub** - `application/epub+zip` +- **zip** - `application/zip` +- **tar** - `application/x-tar` +- **rar** - `application/vnd.rar` +- **gz** - `application/gzip` +- **bz2** - `application/x-bzip2` +- **7z** - `application/x-7z-compressed` +- **xz** - `application/x-xz` +- **zstd** - `application/zstd` +- **pdf** - `application/pdf` +- **exe** - `application/vnd.microsoft.portable-executable` +- **swf** - `application/x-shockwave-flash` +- **rtf** - `application/rtf` +- **iso** - `application/x-iso9660-image` +- **eot** - `application/octet-stream` +- **ps** - `application/postscript` +- **sqlite** - `application/vnd.sqlite3` +- **nes** - `application/x-nintendo-nes-rom` +- **crx** - `application/x-google-chrome-extension` +- **cab** - `application/vnd.ms-cab-compressed` +- **deb** - `application/vnd.debian.binary-package` +- **ar** - `application/x-unix-archive` +- **Z** - `application/x-compress` +- **lz** - `application/x-lzip` +- **lz4** - `application/x-lz4` +- **rpm** - `application/x-rpm` +- **elf** - `application/x-executable` +- **dcm** - `application/dicom` + +#### Documents + +- **doc** - `application/msword` +- **docx** - `application/vnd.openxmlformats-officedocument.wordprocessingml.document` +- **xls** - `application/vnd.ms-excel` +- **xlsx** - `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` +- **ppt** - `application/vnd.ms-powerpoint` +- **pptx** - `application/vnd.openxmlformats-officedocument.presentationml.presentation` + +#### Font + +- **woff** - `application/font-woff` +- **woff2** - `application/font-woff` +- **ttf** - `application/font-sfnt` +- **otf** - `application/font-sfnt` + +#### Application + +- **wasm** - `application/wasm` +- **dex** - `application/vnd.android.dex` +- **dey** - `application/vnd.android.dey` + +## 性能基准 + +通过 [real files](https://github.com/h2non/filetype/tree/master/fixtures) 测量. + +运行环境: OSX x64 i7 2.7 Ghz + +```bash +BenchmarkMatchTar-8 1000000 1083 ns/op +BenchmarkMatchZip-8 1000000 1162 ns/op +BenchmarkMatchJpeg-8 1000000 1280 ns/op +BenchmarkMatchGif-8 1000000 1315 ns/op +BenchmarkMatchPng-8 1000000 1121 ns/op +``` + +## License + +MIT - Tomas Aparicio From d063514e9917b0ecd5d0b7f6392f24f05b434a4d Mon Sep 17 00:00:00 2001 From: Tom <63402+h2non@users.noreply.github.com> Date: Sat, 15 Mar 2025 21:16:20 +0100 Subject: [PATCH 3/4] Delete README_zh_CN.md --- README_zh_CN.md | 297 ------------------------------------------------ 1 file changed, 297 deletions(-) delete mode 100644 README_zh_CN.md diff --git a/README_zh_CN.md b/README_zh_CN.md deleted file mode 100644 index de5bfec..0000000 --- a/README_zh_CN.md +++ /dev/null @@ -1,297 +0,0 @@ -[English](./README.md) | 简体中文 -# filetype [![GoDoc](https://godoc.org/github.com/h2non/filetype?status.svg)](https://godoc.org/github.com/h2non/filetype) [![Go Version](https://img.shields.io/badge/go-v1.0+-green.svg?style=flat)](https://github.com/h2non/gentleman) - -小型且无依赖的 [Go](https://golang.org) 包,用于通过检查[魔数](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files)签名推断文件类型和 MIME 类型。 - -如需检查 SVG 文件类型,请参阅 [go-is-svg](https://github.com/h2non/go-is-svg) 包。Python 版本: [filetype.py](https://github.com/h2non/filetype.py)。 - -## 功能特性 - -- 支持[广泛的](#supported-types)文件类型 -- 提供文件扩展名和正确的 MIME 类型 -- 通过扩展名或 MIME 类型进行文件识别 -- 通过类别(图像、视频、音频等)进行文件识别 -- 提供大量辅助工具和文件匹配快捷方式 -- [可扩展](#add-additional-file-type-matchers):可以添加自定义的新类型和匹配器 -- 简单且语义化的 API -- [速度极快](#benchmarks),即使处理大型文件也不例外 -- 只需要前 262 字节的文件头,因此你可以[传递一个切片](#file-header) -- 无依赖(仅需 Go 代码,无需 C 语言编译) -- 跨平台文件识别 - -## 安装 - -```bash -go get github.com/h2non/filetype -``` - -## API - -请参考[Godoc](https://godoc.org/github.com/h2non/filetype) . - -### 关联子包 - -- [`github.com/h2non/filetype/types`](https://godoc.org/github.com/h2non/filetype/types) -- [`github.com/h2non/filetype/matchers`](https://godoc.org/github.com/h2non/filetype/matchers) - -## 示例 - -#### 简单的文件类型检查 - -```go -package main - -import ( - "fmt" - "io/ioutil" - - "github.com/h2non/filetype" -) - -func main() { - buf, _ := ioutil.ReadFile("sample.jpg") - - kind, _ := filetype.Match(buf) - if kind == filetype.Unknown { - fmt.Println("Unknown file type") - return - } - - fmt.Printf("File type: %s. MIME: %s\n", kind.Extension, kind.MIME.Value) -} -``` - -#### 检查类型类别 - -```go -package main - -import ( - "fmt" - "io/ioutil" - - "github.com/h2non/filetype" -) - -func main() { - buf, _ := ioutil.ReadFile("sample.jpg") - - if filetype.IsImage(buf) { - fmt.Println("File is an image") - } else { - fmt.Println("Not an image") - } -} -``` - -#### 类型支持 - -```go -package main - -import ( - "fmt" - - "github.com/h2non/filetype" -) - -func main() { - // Check if file is supported by extension - if filetype.IsSupported("jpg") { - fmt.Println("Extension supported") - } else { - fmt.Println("Extension not supported") - } - - // Check if file is supported by extension - if filetype.IsMIMESupported("image/jpeg") { - fmt.Println("MIME type supported") - } else { - fmt.Println("MIME type not supported") - } -} -``` - -#### 文件头 - -```go -package main - -import ( - "fmt" - "os" - - "github.com/h2non/filetype" -) - -func main() { - // Open a file descriptor - file, _ := os.Open("movie.mp4") - - // We only have to pass the file header = first 261 bytes - head := make([]byte, 261) - file.Read(head) - - if filetype.IsImage(head) { - fmt.Println("File is an image") - } else { - fmt.Println("Not an image") - } -} -``` - -#### 添加额外的文件类型匹配器 - -```go -package main - -import ( - "fmt" - - "github.com/h2non/filetype" -) - -var fooType = filetype.NewType("foo", "foo/foo") - -func fooMatcher(buf []byte) bool { - return len(buf) > 1 && buf[0] == 0x01 && buf[1] == 0x02 -} - -func main() { - // Register the new matcher and its type - filetype.AddMatcher(fooType, fooMatcher) - - // Check if the new type is supported by extension - if filetype.IsSupported("foo") { - fmt.Println("New supported type: foo") - } - - // Check if the new type is supported by MIME - if filetype.IsMIMESupported("foo/foo") { - fmt.Println("New supported MIME type: foo/foo") - } - - // Try to match the file - fooFile := []byte{0x01, 0x02} - kind, _ := filetype.Match(fooFile) - if kind == filetype.Unknown { - fmt.Println("Unknown file type") - } else { - fmt.Printf("File type matched: %s\n", kind.Extension) - } -} -``` - -## 已支持的类型 - -#### Image - -- **jpg** - `image/jpeg` -- **png** - `image/png` -- **gif** - `image/gif` -- **webp** - `image/webp` -- **cr2** - `image/x-canon-cr2` -- **tif** - `image/tiff` -- **bmp** - `image/bmp` -- **heif** - `image/heif` -- **jxr** - `image/vnd.ms-photo` -- **psd** - `image/vnd.adobe.photoshop` -- **ico** - `image/vnd.microsoft.icon` -- **dwg** - `image/vnd.dwg` -- **avif** - `image/avif` - -#### Video - -- **mp4** - `video/mp4` -- **m4v** - `video/x-m4v` -- **mkv** - `video/x-matroska` -- **webm** - `video/webm` -- **mov** - `video/quicktime` -- **avi** - `video/x-msvideo` -- **wmv** - `video/x-ms-wmv` -- **mpg** - `video/mpeg` -- **flv** - `video/x-flv` -- **3gp** - `video/3gpp` - -#### Audio - -- **mid** - `audio/midi` -- **mp3** - `audio/mpeg` -- **m4a** - `audio/mp4` -- **ogg** - `audio/ogg` -- **flac** - `audio/x-flac` -- **wav** - `audio/x-wav` -- **amr** - `audio/amr` -- **aac** - `audio/aac` -- **aiff** - `audio/x-aiff` - -#### Archive - -- **epub** - `application/epub+zip` -- **zip** - `application/zip` -- **tar** - `application/x-tar` -- **rar** - `application/vnd.rar` -- **gz** - `application/gzip` -- **bz2** - `application/x-bzip2` -- **7z** - `application/x-7z-compressed` -- **xz** - `application/x-xz` -- **zstd** - `application/zstd` -- **pdf** - `application/pdf` -- **exe** - `application/vnd.microsoft.portable-executable` -- **swf** - `application/x-shockwave-flash` -- **rtf** - `application/rtf` -- **iso** - `application/x-iso9660-image` -- **eot** - `application/octet-stream` -- **ps** - `application/postscript` -- **sqlite** - `application/vnd.sqlite3` -- **nes** - `application/x-nintendo-nes-rom` -- **crx** - `application/x-google-chrome-extension` -- **cab** - `application/vnd.ms-cab-compressed` -- **deb** - `application/vnd.debian.binary-package` -- **ar** - `application/x-unix-archive` -- **Z** - `application/x-compress` -- **lz** - `application/x-lzip` -- **lz4** - `application/x-lz4` -- **rpm** - `application/x-rpm` -- **elf** - `application/x-executable` -- **dcm** - `application/dicom` - -#### Documents - -- **doc** - `application/msword` -- **docx** - `application/vnd.openxmlformats-officedocument.wordprocessingml.document` -- **xls** - `application/vnd.ms-excel` -- **xlsx** - `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` -- **ppt** - `application/vnd.ms-powerpoint` -- **pptx** - `application/vnd.openxmlformats-officedocument.presentationml.presentation` - -#### Font - -- **woff** - `application/font-woff` -- **woff2** - `application/font-woff` -- **ttf** - `application/font-sfnt` -- **otf** - `application/font-sfnt` - -#### Application - -- **wasm** - `application/wasm` -- **dex** - `application/vnd.android.dex` -- **dey** - `application/vnd.android.dey` - -## 性能基准 - -通过 [real files](https://github.com/h2non/filetype/tree/master/fixtures) 测量. - -运行环境: OSX x64 i7 2.7 Ghz - -```bash -BenchmarkMatchTar-8 1000000 1083 ns/op -BenchmarkMatchZip-8 1000000 1162 ns/op -BenchmarkMatchJpeg-8 1000000 1280 ns/op -BenchmarkMatchGif-8 1000000 1315 ns/op -BenchmarkMatchPng-8 1000000 1121 ns/op -``` - -## License - -MIT - Tomas Aparicio From 7128f22b95672862103866d69b9843a3378e3d38 Mon Sep 17 00:00:00 2001 From: Tom <63402+h2non@users.noreply.github.com> Date: Sat, 15 Mar 2025 21:16:46 +0100 Subject: [PATCH 4/4] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 464f099..d13705c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -English | [简体中文](./README_zh_CN.md) # filetype [![GoDoc](https://godoc.org/github.com/h2non/filetype?status.svg)](https://godoc.org/github.com/h2non/filetype) [![Go Version](https://img.shields.io/badge/go-v1.0+-green.svg?style=flat)](https://github.com/h2non/gentleman) Small and dependency free [Go](https://golang.org) package to infer file and MIME type checking the [magic numbers]() signature.