Class Ole::Storage::MetaData
In: lib/ole/storage/meta_data.rb
Parent: Object
String Data Lpstr Clsid Lpwstr Section Enumerable DateTime FileTime Constants Variant::Constants Storage\n[lib/ole/storage/base.rb\nlib/ole/storage/file_system.rb\nlib/ole/storage/meta_data.rb] PropertySet lib/ole/storage/file_system.rb lib/ole/types/property_set.rb lib/ole/types/base.rb Constants Constants Variant Types Ole dot/m_9_0.png

The MetaData class is designed to be high level interface to all the underlying meta data stored within different sections, themselves within different property set streams.

With this class, you can simply get properties using their names, without needing to know about the underlying guids, property ids etc.

Example:

  Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author }

TODO:

  • add write support
  • fix some of the missing type coercion (eg FileTime)
  • maybe add back the ability to access individual property sets as a unit directly. ie ole.summary_information. Is this useful?
  • full key support, for unknown keys, like ole.meta_data[myguid, myid]. probably needed for user-defined properties too.

Methods

[]   []=   comp_obj   each   file_format   method_missing   mime_type   new   to_h  

Included Modules

Enumerable

Constants

FILE_MAP = { Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation", Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation"
FORMAT_MAP = { 'MSWordDoc' => :doc
CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}"
CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}"
CLSID_WORD97 = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}"
CLSID_WORD95 = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}"
CLSID_MAP = { CLSID_EXCEL97 => :xls, CLSID_EXCEL95 => :xls, CLSID_WORD97 => :doc, CLSID_WORD95 => :doc
MIME_TYPES = { :xls => 'application/vnd.ms-excel', :doc => 'application/msword', :ppt => 'application/vnd.ms-powerpoint', # not registered at IANA, but seems most common usage :msg => 'application/vnd.ms-outlook', # this is my default fallback option. also not registered at IANA. # file(1)'s default is application/msword, which is useless... nil => 'application/x-ole-storage'

Public Class methods

[Source]

    # File lib/ole/storage/meta_data.rb, line 62
62:                         def initialize ole
63:                                 @ole = ole
64:                         end

Public Instance methods

[Source]

     # File lib/ole/storage/meta_data.rb, line 114
114:                         def [] key
115:                                 pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil
116:                                 file = FILE_MAP[pair.first] or return nil
117:                                 dirent = @ole.root[file] or return nil
118:                                 dirent.open { |io| return Types::PropertySet.new(io)[key] }
119:                         end

[Source]

     # File lib/ole/storage/meta_data.rb, line 121
121:                         def []= key, value
122:                                 raise NotImplementedError, 'meta data writes not implemented'
123:                         end

[Source]

     # File lib/ole/storage/meta_data.rb, line 125
125:                         def each(&block)
126:                                 FILE_MAP.values.each do |file|
127:                                         dirent = @ole.root[file] or next
128:                                         dirent.open { |io| Types::PropertySet.new(io).each(&block) }
129:                                 end
130:                         end

[Source]

    # File lib/ole/storage/meta_data.rb, line 92
92:                         def file_format
93:                                 comp_obj[:file_format]
94:                         end

[Source]

     # File lib/ole/storage/meta_data.rb, line 136
136:                         def method_missing name, *args, &block
137:                                 return super unless args.empty?
138:                                 pair = Types::PropertySet::PROPERTY_MAP[name.to_s] or return super
139:                                 self[name]
140:                         end

[Source]

     # File lib/ole/storage/meta_data.rb, line 96
 96:                         def mime_type
 97:                                 # based on the CompObj stream contents
 98:                                 type = FORMAT_MAP[file_format]
 99:                                 return MIME_TYPES[type] if type
100: 
101:                                 # based on the root clsid
102:                                 type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)]
103:                                 return MIME_TYPES[type] if type
104: 
105:                                 # fallback to heuristics
106:                                 has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten]
107:                                 return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0']
108:                                 return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document']
109:                                 return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book']
110: 
111:                                 MIME_TYPES[nil]
112:                         end

[Source]

     # File lib/ole/storage/meta_data.rb, line 132
132:                         def to_h
133:                                 inject({}) { |hash, (name, value)| hash.update name.to_sym => value }
134:                         end

Private Instance methods

i‘m thinking of making file_format and mime_type available through #[], each, and to_h also, as calculated meta data (not assignable)

[Source]

    # File lib/ole/storage/meta_data.rb, line 69
69:                         def comp_obj
70:                                 return {} unless dirent = @ole.root["\001CompObj"]
71:                                 data = dirent.read
72:                                 # see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html
73:                                 # compobj_version: 0x0001
74:                                 # byte_order: 0xffe
75:                                 # windows_version: 0x00000a03 (win31 apparently)
76:                                 # marker: 0xffffffff
77:                                 compobj_version, byte_order, windows_version, marker, clsid =
78:                                         data.unpack("vvVVa#{Types::Clsid::SIZE}")
79:                                 strings = []
80:                                 i = 28
81:                                 while i < data.length
82:                                         len = data[i, 4].unpack('V').first
83:                                         i += 4
84:                                         strings << data[i, len - 1]
85:                                         i += len
86:                                 end
87:                                 # in the unknown chunk, you usually see something like 'Word.Document.6'
88:                                 {:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]}
89:                         end

[Validate]