Class | Ole::Storage::MetaData |
In: |
lib/ole/storage/meta_data.rb
|
Parent: | Object |
The MetaData class is designed to be high level interface to all the underlying meta data stored within different sections, themselves within different property set streams.
With this class, you can simply get properties using their names, without needing to know about the underlying guids, property ids etc.
Example:
Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author }
TODO:
FILE_MAP | = | { Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation", Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation" |
FORMAT_MAP | = | { 'MSWordDoc' => :doc |
CLSID_EXCEL97 | = | Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}" |
CLSID_EXCEL95 | = | Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}" |
CLSID_WORD97 | = | Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}" |
CLSID_WORD95 | = | Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}" |
CLSID_MAP | = | { CLSID_EXCEL97 => :xls, CLSID_EXCEL95 => :xls, CLSID_WORD97 => :doc, CLSID_WORD95 => :doc |
MIME_TYPES | = | { :xls => 'application/vnd.ms-excel', :doc => 'application/msword', :ppt => 'application/vnd.ms-powerpoint', # not registered at IANA, but seems most common usage :msg => 'application/vnd.ms-outlook', # this is my default fallback option. also not registered at IANA. # file(1)'s default is application/msword, which is useless... nil => 'application/x-ole-storage' |
# File lib/ole/storage/meta_data.rb, line 114 114: def [] key 115: pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil 116: file = FILE_MAP[pair.first] or return nil 117: dirent = @ole.root[file] or return nil 118: dirent.open { |io| return Types::PropertySet.new(io)[key] } 119: end
# File lib/ole/storage/meta_data.rb, line 121 121: def []= key, value 122: raise NotImplementedError, 'meta data writes not implemented' 123: end
# File lib/ole/storage/meta_data.rb, line 125 125: def each(&block) 126: FILE_MAP.values.each do |file| 127: dirent = @ole.root[file] or next 128: dirent.open { |io| Types::PropertySet.new(io).each(&block) } 129: end 130: end
# File lib/ole/storage/meta_data.rb, line 136 136: def method_missing name, *args, &block 137: return super unless args.empty? 138: pair = Types::PropertySet::PROPERTY_MAP[name.to_s] or return super 139: self[name] 140: end
# File lib/ole/storage/meta_data.rb, line 96 96: def mime_type 97: # based on the CompObj stream contents 98: type = FORMAT_MAP[file_format] 99: return MIME_TYPES[type] if type 100: 101: # based on the root clsid 102: type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)] 103: return MIME_TYPES[type] if type 104: 105: # fallback to heuristics 106: has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten] 107: return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0'] 108: return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document'] 109: return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book'] 110: 111: MIME_TYPES[nil] 112: end
# File lib/ole/storage/meta_data.rb, line 132 132: def to_h 133: inject({}) { |hash, (name, value)| hash.update name.to_sym => value } 134: end
i‘m thinking of making file_format and mime_type available through #[], each, and to_h also, as calculated meta data (not assignable)
# File lib/ole/storage/meta_data.rb, line 69 69: def comp_obj 70: return {} unless dirent = @ole.root["\001CompObj"] 71: data = dirent.read 72: # see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html 73: # compobj_version: 0x0001 74: # byte_order: 0xffe 75: # windows_version: 0x00000a03 (win31 apparently) 76: # marker: 0xffffffff 77: compobj_version, byte_order, windows_version, marker, clsid = 78: data.unpack("vvVVa#{Types::Clsid::SIZE}") 79: strings = [] 80: i = 28 81: while i < data.length 82: len = data[i, 4].unpack('V').first 83: i += 4 84: strings << data[i, len - 1] 85: i += len 86: end 87: # in the unknown chunk, you usually see something like 'Word.Document.6' 88: {:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]} 89: end