Skip to content

Add some file types from the Go version #41

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ Image
- **psd** - ``image/vnd.adobe.photoshop``
- **ico** - ``image/x-icon``
- **heic** - ``image/heic``
- **dwg** - ``image/vnd.dwg``

Video
^^^^^
Expand All @@ -87,6 +88,7 @@ Video
- **wmv** - ``video/x-ms-wmv``
- **mpg** - ``video/mpeg``
- **flv** - ``video/x-flv``
- **3gp** - ``video/3gpp``

Audio
^^^^^
Expand All @@ -98,6 +100,13 @@ Audio
- **flac** - ``audio/x-flac``
- **wav** - ``audio/x-wav``
- **amr** - ``audio/amr``
- **aac** - ``audio/aac``

Application
^^^^^^^^^^^

- **wasm** - ``application/wasm``


Archive
^^^^^^^
Expand Down Expand Up @@ -126,6 +135,11 @@ Archive
- **Z** - ``application/x-compress``
- **lz** - ``application/x-lzip``

- **rpm** - ``application/x-rpm``
- **elf** - ``application/x-executable``
- **dcm** - ``application/dicom``
- **iso** - ``application/x-iso9660-image``

Font
^^^^

Expand All @@ -134,6 +148,13 @@ Font
- **ttf** - ``application/font-sfnt``
- **otf** - ``application/font-sfnt``

Document
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Group microsoft office pre 2007 documents as application/x-ole-storage as this 3 types have same file signatures. We can determine the file type by filename extension.

^^^^^^^^
- **doc** - ``application/msword``
- **xls** - ``application/vnd.ms-excel``
- **ppt** - ``application/vnd.ms-powerpoint``


.. _Python: http://python.org
.. _magic numbers: https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files
.. _filetype: https://github.com/h2non/filetype
Expand Down
32 changes: 32 additions & 0 deletions filetype/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,35 @@ def is_font(obj):
TypeError: if obj is not a supported type.
"""
return match.font(obj) is not None


def is_application(obj):
"""
Checks if a given input is a supported type application.

Args:
obj: path to file, bytes or bytearray.

Returns:
True if obj is a valid font. Otherwise False.

Raises:
TypeError: if obj is not a supported type.
"""
return match.application(obj) is not None


def is_document(obj):
"""
Checks if a given input is a supported type document.

Args:
obj: path to file, bytes or bytearray.

Returns:
True if obj is a valid font. Otherwise False.

Raises:
TypeError: if obj is not a supported type.
"""
return match.document(obj) is not None
36 changes: 36 additions & 0 deletions filetype/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

from __future__ import absolute_import

from .types import APPLICATION as application_matchers
from .types import ARCHIVE as archive_matchers
from .types import AUDIO as audio_matchers
from .types import DOCUMENT as document_matchers
from .types import FONT as font_matchers
from .types import IMAGE as image_matchers
from .types import VIDEO as video_matchers
Expand Down Expand Up @@ -117,3 +119,37 @@ def archive(obj):
TypeError: if obj is not a supported type.
"""
return match(obj, archive_matchers)


def application(obj):
"""
Matches the given input againts the available
application type matchers.

Args:
obj: path to file, bytes or bytearray.

Returns:
Type instance if matches. Otherwise None.

Raises:
TypeError: if obj is not a supported type.
"""
return match(obj, application_matchers)


def document(obj):
"""
Matches the given input againts the available
document type matchers.

Args:
obj: path to file, bytes or bytearray.

Returns:
Type instance if matches. Otherwise None.

Raises:
TypeError: if obj is not a supported type.
"""
return match(obj, document_matchers)
29 changes: 25 additions & 4 deletions filetype/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

from __future__ import absolute_import

from . import application
from . import archive
from . import audio
from . import document
from . import font
from . import image
from . import video
Expand All @@ -23,20 +25,21 @@
image.Psd(),
image.Ico(),
image.Heic(),
image.Dcm(),
image.Dwg(),
)

# Supported video types
VIDEO = (
video.Mp4(),
video.M4v(),
video.Mkv(),
video.Webm(),
video.Mov(),
video.Avi(),
video.Wmv(),
video.Mpeg(),
video.Webm(),
video.Flv(),
video.Mp4(),
video.Match3gp(),
)

# Supported audio types
Expand All @@ -48,6 +51,7 @@
audio.Flac(),
audio.Wav(),
audio.Amr(),
audio.Aac(),
)

# Supported font types
Expand Down Expand Up @@ -77,7 +81,24 @@
archive.Ar(),
archive.Z(),
archive.Lz(),
archive.Rpm(),
archive.Elf(),
archive.Dcm(),
archive.Iso(),
)

# Supported application types
APPLICATION = (
application.Wasm(),
)

# Supported application types
DOCUMENT = (
document.Doc(),
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doc type it will valid for xls and ppt because of same file signature, so no need to define the others.

document.Xls(),
document.Ppt(),
)


# Expose supported type matchers
TYPES = list(VIDEO + IMAGE + AUDIO + FONT + ARCHIVE)
TYPES = list(VIDEO + IMAGE + AUDIO + FONT + ARCHIVE + APPLICATION + DOCUMENT)
34 changes: 34 additions & 0 deletions filetype/types/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-

from __future__ import absolute_import

from .base import Type


class Wasm(Type):
"""
Implements the WASM Web Assembly 1.0 filetype.

WASM has starts with `\0asm`, followed by the version.
http://webassembly.github.io/spec/core/binary/modules.html#binary-magic
"""

MIME = 'application/wasm'
EXTENSION = 'wasm'

def __init__(self):
super(Wasm, self).__init__(
mime=Wasm.MIME,
extension=Wasm.EXTENSION
)

def match(self, buf):
return (len(buf) > 8 and
buf[0] == 0x00 and
buf[1] == 0x61 and
buf[2] == 0x73 and
buf[3] == 0x6D and
buf[4] == 0x01 and
buf[5] == 0x00 and
buf[6] == 0x00 and
buf[7] == 0x00)
85 changes: 85 additions & 0 deletions filetype/types/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,3 +513,88 @@ def match(self, buf):
buf[1] == 0x5A and
buf[2] == 0x49 and
buf[3] == 0x50)


class Rpm(Type):
"""
Implements the RPM archive type matcher.
"""
MIME = 'application/x-rpm'
EXTENSION = 'rpm'

def __init__(self):
super(Rpm, self).__init__(
mime=Rpm.MIME,
extension=Rpm.EXTENSION
)

def match(self, buf):
return (len(buf) > 96 and
buf[0] == 0xED and
buf[1] == 0xAB and
buf[2] == 0xEE and
buf[3] == 0xDB)


class Elf(Type):
"""
Implements the Elf archive type matcher.
"""
MIME = 'application/x-executable'
EXTENSION = 'elf'

def __init__(self):
super(Elf, self).__init__(
mime=Elf.MIME,
extension=Elf.EXTENSION
)

def match(self, buf):
return (len(buf) > 52 and
buf[0] == 0x7F and
buf[1] == 0x45 and
buf[2] == 0x4C and
buf[3] == 0x46)


class Dcm(Type):
"""
Implements the Dcm archive type matcher.
"""
MIME = 'application/dicom'
EXTENSION = 'dcm'

def __init__(self):
super(Dcm, self).__init__(
mime=Dcm.MIME,
extension=Dcm.EXTENSION
)

def match(self, buf):
return (len(buf) > 131 and
buf[128] == 0x44 and
buf[129] == 0x49 and
buf[130] == 0x43 and
buf[131] == 0x4D)


class Iso(Type):
"""
Implements the ISO archive type matcher.
"""
MIME = 'application/x-iso9660-image'
EXTENSION = 'iso'

def __init__(self):
super(Iso, self).__init__(
mime=Iso.MIME,
extension=Iso.EXTENSION
)

def match(self, buf):
return (len(buf) > 32773 and
buf[32769] == 0x43 and
buf[32770] == 0x44 and
buf[32771] == 0x30 and
buf[32772] == 0x30 and
buf[32773] == 0x31)
21 changes: 21 additions & 0 deletions filetype/types/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,24 @@ def match(self, buf):
buf[3] == 0x4D and
buf[4] == 0x52 and
buf[5] == 0x0A)


class Aac(Type):
"""
Implements the AAC audio type matcher.
"""
MIME = 'audio/aac'
EXTENSION = 'aac'

def __init__(self):
super(Aac, self).__init__(
mime=Aac.MIME,
extension=Aac.EXTENSION
)

def match(self, buf):
return (len(buf) > 1 and
((buf[0] == 0xFF and
buf[1] == 0xF1) or
(buf[0] == 0xFF and
buf[1] == 0xF9)))
Loading