From ef5ad90f3aba98ae3e222f6b076377701997585b Mon Sep 17 00:00:00 2001 From: Zheng Ruoqin Date: Fri, 5 May 2017 02:23:24 +0900 Subject: [PATCH] Modified the magic.py for dosocs2 to fix the error as fowllowing. AttributeError: 'module' object has no attribute 'from_file' Signed-off-by: Zheng Ruoqin --- magic.py | 462 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 262 insertions(+), 200 deletions(-) diff --git a/magic.py b/magic.py index a17e8da..c6142a7 100644 --- a/magic.py +++ b/magic.py @@ -1,221 +1,283 @@ -#!/usr/bin/env python -''' -Python bindings for libmagic -''' +""" +magic is a wrapper around the libmagic file identification library. -import ctypes +See README for more information. -from ctypes import * -from ctypes.util import find_library +Usage: +>>> import magic +>>> magic.from_file("testdata/test.pdf") +'PDF document, version 1.2' +>>> magic.from_file("testdata/test.pdf", mime=True) +'application/pdf' +>>> magic.from_buffer(open("testdata/test.pdf").read(1024)) +'PDF document, version 1.2' +>>> -def _init(): - """ - Loads the shared library through ctypes and returns a library - L{ctypes.CDLL} instance - """ - return ctypes.cdll.LoadLibrary(find_library('magic')) - -_libraries = {} -_libraries['magic'] = _init() - -# Flag constants for open and setflags -MAGIC_NONE = NONE = 0 -MAGIC_DEBUG = DEBUG = 1 -MAGIC_SYMLINK = SYMLINK = 2 -MAGIC_COMPRESS = COMPRESS = 4 -MAGIC_DEVICES = DEVICES = 8 -MAGIC_MIME_TYPE = MIME_TYPE = 16 -MAGIC_CONTINUE = CONTINUE = 32 -MAGIC_CHECK = CHECK = 64 -MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128 -MAGIC_RAW = RAW = 256 -MAGIC_ERROR = ERROR = 512 -MAGIC_MIME_ENCODING = MIME_ENCODING = 1024 -MAGIC_MIME = MIME = 1040 -MAGIC_APPLE = APPLE = 2048 - -MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096 -MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192 -MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384 -MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768 -MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536 -MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072 -MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144 -MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576 -MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152 - -MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824 - - -class magic_set(Structure): - pass -magic_set._fields_ = [] -magic_t = POINTER(magic_set) - -_open = _libraries['magic'].magic_open -_open.restype = magic_t -_open.argtypes = [c_int] - -_close = _libraries['magic'].magic_close -_close.restype = None -_close.argtypes = [magic_t] - -_file = _libraries['magic'].magic_file -_file.restype = c_char_p -_file.argtypes = [magic_t, c_char_p] - -_descriptor = _libraries['magic'].magic_descriptor -_descriptor.restype = c_char_p -_descriptor.argtypes = [magic_t, c_int] - -_buffer = _libraries['magic'].magic_buffer -_buffer.restype = c_char_p -_buffer.argtypes = [magic_t, c_void_p, c_size_t] - -_error = _libraries['magic'].magic_error -_error.restype = c_char_p -_error.argtypes = [magic_t] - -_setflags = _libraries['magic'].magic_setflags -_setflags.restype = c_int -_setflags.argtypes = [magic_t, c_int] - -_load = _libraries['magic'].magic_load -_load.restype = c_int -_load.argtypes = [magic_t, c_char_p] - -_compile = _libraries['magic'].magic_compile -_compile.restype = c_int -_compile.argtypes = [magic_t, c_char_p] - -_check = _libraries['magic'].magic_check -_check.restype = c_int -_check.argtypes = [magic_t, c_char_p] - -_list = _libraries['magic'].magic_list -_list.restype = c_int -_list.argtypes = [magic_t, c_char_p] - -_errno = _libraries['magic'].magic_errno -_errno.restype = c_int -_errno.argtypes = [magic_t] - - -class Magic(object): - def __init__(self, ms): - self._magic_t = ms - - def close(self): - """ - Closes the magic database and deallocates any resources used. - """ - _close(self._magic_t) - def file(self, filename): - """ - Returns a textual description of the contents of the argument passed - as a filename or None if an error occurred and the MAGIC_ERROR flag - is set. A call to errno() will return the numeric error code. - """ - try: # attempt python3 approach first - if isinstance(filename, bytes): - bi = filename - else: - bi = bytes(filename, 'utf-8') - return str(_file(self._magic_t, bi), 'utf-8') - except: - return _file(self._magic_t, filename.encode('utf-8')) - - def descriptor(self, fd): - """ - Like the file method, but the argument is a file descriptor. - """ - return _descriptor(self._magic_t, fd) +""" - def buffer(self, buf): - """ - Returns a textual description of the contents of the argument passed - as a buffer or None if an error occurred and the MAGIC_ERROR flag - is set. A call to errno() will return the numeric error code. - """ - try: # attempt python3 approach first - return str(_buffer(self._magic_t, buf, len(buf)), 'utf-8') - except: - return _buffer(self._magic_t, buf, len(buf)) +import sys +import glob +import os.path +import ctypes +import ctypes.util +import threading - def error(self): - """ - Returns a textual explanation of the last error or None - if there was no error. - """ - try: # attempt python3 approach first - return str(_error(self._magic_t), 'utf-8') - except: - return _error(self._magic_t) +from ctypes import c_char_p, c_int, c_size_t, c_void_p - def setflags(self, flags): - """ - Set flags on the magic object which determine how magic checking - behaves; a bitwise OR of the flags described in libmagic(3), but - without the MAGIC_ prefix. - Returns -1 on systems that don't support utime(2) or utimes(2) - when PRESERVE_ATIME is set. - """ - return _setflags(self._magic_t, flags) +class MagicException(Exception): + def __init__(self, message): + super(MagicException, self).__init__(message) + self.message = message - def load(self, filename=None): - """ - Must be called to load entries in the colon separated list of database - files passed as argument or the default database file if no argument - before any magic queries can be performed. - Returns 0 on success and -1 on failure. - """ - return _load(self._magic_t, filename) +class Magic: + """ + Magic is a wrapper around the libmagic C library. - def compile(self, dbs): - """ - Compile entries in the colon separated list of database files - passed as argument or the default database file if no argument. - Returns 0 on success and -1 on failure. - The compiled files created are named from the basename(1) of each file - argument with ".mgc" appended to it. - """ - return _compile(self._magic_t, dbs) + """ - def check(self, dbs): - """ - Check the validity of entries in the colon separated list of - database files passed as argument or the default database file - if no argument. - Returns 0 on success and -1 on failure. + def __init__(self, mime=False, magic_file=None, mime_encoding=False, + keep_going=False, uncompress=False): """ - return _check(self._magic_t, dbs) + Create a new libmagic wrapper. - def list(self, dbs): + mime - if True, mimetypes are returned instead of textual descriptions + mime_encoding - if True, codec is returned + magic_file - use a mime database other than the system default + keep_going - don't stop at the first match, keep going + uncompress - Try to look inside compressed files. """ - Check the validity of entries in the colon separated list of - database files passed as argument or the default database file - if no argument. - Returns 0 on success and -1 on failure. - """ - return _list(self._magic_t, dbs) - - def errno(self): + self.flags = MAGIC_NONE + if mime: + self.flags |= MAGIC_MIME + elif mime_encoding: + self.flags |= MAGIC_MIME_ENCODING + if keep_going: + self.flags |= MAGIC_CONTINUE + + if uncompress: + self.flags |= MAGIC_COMPRESS + + self.cookie = magic_open(self.flags) + self.lock = threading.Lock() + + magic_load(self.cookie, magic_file) + + def from_buffer(self, buf): """ - Returns a numeric error code. If return value is 0, an internal - magic error occurred. If return value is non-zero, the value is - an OS error code. Use the errno module or os.strerror() can be used - to provide detailed error information. + Identify the contents of `buf` """ - return _errno(self._magic_t) - + with self.lock: + try: + return magic_buffer(self.cookie, buf) + except MagicException as e: + return self._handle509Bug(e) + + def from_file(self, filename): + # raise FileNotFoundException or IOError if the file does not exist + with open(filename): + pass + with self.lock: + try: + return magic_file(self.cookie, filename) + except MagicException as e: + return self._handle509Bug(e) + + def _handle509Bug(self, e): + # libmagic 5.09 has a bug where it might fail to identify the + # mimetype of a file and returns null from magic_file (and + # likely _buffer), but also does not return an error message. + if e.message is None and (self.flags & MAGIC_MIME): + return "application/octet-stream" + + def __del__(self): + # no _thread_check here because there can be no other + # references to this object at this point. + + # during shutdown magic_close may have been cleared already so + # make sure it exists before using it. + + # the self.cookie check should be unnecessary and was an + # incorrect fix for a threading problem, however I'm leaving + # it in because it's harmless and I'm slightly afraid to + # remove it. + if self.cookie and magic_close: + magic_close(self.cookie) + self.cookie = None + +_instances = {} + +def _get_magic_type(mime): + i = _instances.get(mime) + if i is None: + i = _instances[mime] = Magic(mime=mime) + return i + +def from_file(filename, mime=False): + """" + Accepts a filename and returns the detected filetype. Return + value is the mimetype if mime=True, otherwise a human readable + name. + + >>> magic.from_file("testdata/test.pdf", mime=True) + 'application/pdf' + """ + m = _get_magic_type(mime) + return m.from_file(filename) -def open(flags): +def from_buffer(buffer, mime=False): """ - Returns a magic object on success and None on failure. - Flags argument as for setflags. + Accepts a binary string and returns the detected filetype. Return + value is the mimetype if mime=True, otherwise a human readable + name. + + >>> magic.from_buffer(open("testdata/test.pdf").read(1024)) + 'PDF document, version 1.2' """ - return Magic(_open(flags)) + m = _get_magic_type(mime) + return m.from_buffer(buffer) + + + + +libmagic = None +# Let's try to find magic or magic1 +dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1') + +# This is necessary because find_library returns None if it doesn't find the library +if dll: + libmagic = ctypes.CDLL(dll) + +if not libmagic or not libmagic._name: + windows_dlls = ['magic1.dll','cygmagic-1.dll'] + platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib', + '/usr/local/lib/libmagic.dylib'] + + # Assumes there will only be one version installed + glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'), + 'win32': windows_dlls, + 'cygwin': windows_dlls } + for dll in platform_to_lib.get(sys.platform, []): + try: + libmagic = ctypes.CDLL(dll) + break + except OSError: + pass + +if not libmagic or not libmagic._name: + # It is better to raise an ImportError since we are importing magic module + raise ImportError('failed to find libmagic. Check your installation') + +magic_t = ctypes.c_void_p + +def errorcheck_null(result, func, args): + if result is None: + err = magic_error(args[0]) + raise MagicException(err) + else: + return result + +def errorcheck_negative_one(result, func, args): + if result is -1: + err = magic_error(args[0]) + raise MagicException(err) + else: + return result + + +def coerce_filename(filename): + if filename is None: + return None + + # ctypes will implicitly convert unicode strings to bytes with + # .encode('ascii'). If you use the filesystem encoding + # then you'll get inconsistent behavior (crashes) depending on the user's + # LANG environment variable + is_unicode = (sys.version_info[0] <= 2 and + isinstance(filename, unicode)) or \ + (sys.version_info[0] >= 3 and + isinstance(filename, str)) + if is_unicode: + return filename.encode('utf-8') + else: + return filename + +magic_open = libmagic.magic_open +magic_open.restype = magic_t +magic_open.argtypes = [c_int] + +magic_close = libmagic.magic_close +magic_close.restype = None +magic_close.argtypes = [magic_t] + +magic_error = libmagic.magic_error +magic_error.restype = c_char_p +magic_error.argtypes = [magic_t] + +magic_errno = libmagic.magic_errno +magic_errno.restype = c_int +magic_errno.argtypes = [magic_t] + +_magic_file = libmagic.magic_file +_magic_file.restype = c_char_p +_magic_file.argtypes = [magic_t, c_char_p] +_magic_file.errcheck = errorcheck_null + +def magic_file(cookie, filename): + return _magic_file(cookie, coerce_filename(filename)) + +_magic_buffer = libmagic.magic_buffer +_magic_buffer.restype = c_char_p +_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] +_magic_buffer.errcheck = errorcheck_null + +def magic_buffer(cookie, buf): + return _magic_buffer(cookie, buf, len(buf)) + + +_magic_load = libmagic.magic_load +_magic_load.restype = c_int +_magic_load.argtypes = [magic_t, c_char_p] +_magic_load.errcheck = errorcheck_negative_one + +def magic_load(cookie, filename): + return _magic_load(cookie, coerce_filename(filename)) + +magic_setflags = libmagic.magic_setflags +magic_setflags.restype = c_int +magic_setflags.argtypes = [magic_t, c_int] + +magic_check = libmagic.magic_check +magic_check.restype = c_int +magic_check.argtypes = [magic_t, c_char_p] + +magic_compile = libmagic.magic_compile +magic_compile.restype = c_int +magic_compile.argtypes = [magic_t, c_char_p] + + + +MAGIC_NONE = 0x000000 # No flags +MAGIC_DEBUG = 0x000001 # Turn on debugging +MAGIC_SYMLINK = 0x000002 # Follow symlinks +MAGIC_COMPRESS = 0x000004 # Check inside compressed files +MAGIC_DEVICES = 0x000008 # Look at the contents of devices +MAGIC_MIME = 0x000010 # Return a mime string +MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding +MAGIC_CONTINUE = 0x000020 # Return all matches +MAGIC_CHECK = 0x000040 # Print warnings to stderr +MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit +MAGIC_RAW = 0x000100 # Don't translate unprintable chars +MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors + +MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files +MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files +MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries +MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type +MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details +MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files +MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff +MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran +MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens -- 1.8.4.2