Class | Ole::Storage |
In: |
lib/ole/storage/meta_data.rb
lib/ole/storage/base.rb lib/ole/storage/file_system.rb |
Parent: | Object |
This class is the primary way the user interacts with an OLE storage file.
VERSION | = | '1.2.9' |
bbat | [R] | Low level internals, you probably shouldn‘t need to mess with these |
close_parent | [R] | The underlying io object to/from which the ole object is serialized, whether we should close it, and whether it is writeable |
dirents | [R] | The tree structure in its original flattened form. only valid after load, or flush. |
header | [R] | Low level internals, you probably shouldn‘t need to mess with these |
io | [R] | The underlying io object to/from which the ole object is serialized, whether we should close it, and whether it is writeable |
params | [R] | options used at creation time |
root | [R] | The top of the ole tree structure |
sb_file | [R] | Low level internals, you probably shouldn‘t need to mess with these |
sbat | [R] | Low level internals, you probably shouldn‘t need to mess with these |
writeable | [R] | The underlying io object to/from which the ole object is serialized, whether we should close it, and whether it is writeable |
arg should be either a filename, or an IO object, and needs to be seekable. mode is optional, and should be a regular mode string.
# File lib/ole/storage/base.rb, line 40 40: def initialize arg, mode=nil, params={} 41: params, mode = mode, nil if Hash === mode 42: params = {:update_timestamps => true}.merge(params) 43: @params = params 44: 45: # get the io object 46: @close_parent, @io = if String === arg 47: mode ||= 'rb' 48: [true, open(arg, mode)] 49: else 50: raise ArgumentError, 'unable to specify mode string with io object' if mode 51: [false, arg] 52: end 53: # do we have this file opened for writing? don't know of a better way to tell 54: # (unless we parse the mode string in the open case) 55: # hmmm, note that in ruby 1.9 this doesn't work anymore. which is all the more 56: # reason to use mode string parsing when available, and fall back to something like 57: # io.writeable? otherwise. 58: @writeable = begin 59: if mode 60: IO::Mode.new(mode).writeable? 61: else 62: @io.flush 63: # this is for the benefit of ruby-1.9 64: # generates warnings on jruby though... :/ 65: if RUBY_PLATFORM != 'java' and @io.respond_to?(:syswrite) 66: @io.syswrite('') 67: end 68: true 69: end 70: rescue IOError 71: false 72: end 73: # silence undefined warning in clear 74: @sb_file = nil 75: # if the io object has data, we should load it, otherwise start afresh 76: # this should be based on the mode string rather. 77: @io.size > 0 ? load : clear 78: end
somewhat similar to File.open, the open class method allows a block form where the Ole::Storage object is automatically closed on completion of the block.
# File lib/ole/storage/base.rb, line 82 82: def self.open arg, mode=nil, params={} 83: ole = new arg, mode, params 84: if block_given? 85: begin yield ole 86: ensure; ole.close 87: end 88: else ole 89: end 90: end
# File lib/ole/storage/base.rb, line 328 328: def bat_for_size size 329: # note >=, not > previously. 330: size >= @header.threshold ? @bbat : @sbat 331: end
# File lib/ole/storage/base.rb, line 290 290: def clear 291: # initialize to equivalent of loading an empty ole document. 292: Log.warn 'creating new ole storage object on non-writable io' unless @writeable 293: @header = Header.new 294: @bbat = AllocationTable::Big.new self 295: @root = Dirent.new self, :type => :root, :name => 'Root Entry' 296: @dirents = [@root] 297: @root.idx = 0 298: @sb_file.close if @sb_file 299: @sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC 300: @sbat = AllocationTable::Small.new self 301: # throw everything else the hell away 302: @io.truncate 0 303: end
# File lib/ole/storage/base.rb, line 158 158: def close 159: @sb_file.close 160: flush if @writeable 161: @io.close if @close_parent 162: end
tries to get a dirent for path. return nil if it doesn‘t exist (change it)
# File lib/ole/storage/file_system.rb, line 46 46: def dirent_from_path path 47: dirent = @root 48: path = file.expand_path path 49: path = path.sub(/^\/*/, '').sub(/\/*$/, '').split(/\/+/) 50: until path.empty? 51: return nil if dirent.file? 52: return nil unless dirent = dirent/path.shift 53: end 54: dirent 55: end
# File lib/ole/storage/file_system.rb, line 36 36: def file 37: @file ||= FileClass.new self 38: end
the flush method is the main "save" method. all file contents are always written directly to the file by the RangesIO objects, all this method does is write out all the file meta data - dirents, allocation tables, file header etc.
maybe add an option to zero the padding, and any remaining avail blocks in the allocation table.
TODO: long and overly complex. simplify and test better. eg, perhaps move serialization of bbat to AllocationTable::Big.
# File lib/ole/storage/base.rb, line 174 174: def flush 175: # update root dirent, and flatten dirent tree 176: @root.name = 'Root Entry' 177: @root.first_block = @sb_file.first_block 178: @root.size = @sb_file.size 179: @dirents = @root.flatten 180: 181: # serialize the dirents using the bbat 182: RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io| 183: @dirents.each { |dirent| io.write dirent.to_s } 184: padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size 185: io.write 0.chr * padding 186: @header.dirent_start = io.first_block 187: end 188: 189: # serialize the sbat 190: # perhaps the blocks used by the sbat should be marked with BAT? 191: RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io| 192: io.write @sbat.to_s 193: @header.sbat_start = io.first_block 194: @header.num_sbat = @bbat.chain(@header.sbat_start).length 195: end 196: 197: # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using 198: # truncate. then when its time to write, convert that chain and some chunk of blocks at 199: # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its 200: # done. 201: # this is perhaps not good, as we reclaim all bat blocks here, which 202: # may include the sbat we just wrote. FIXME 203: @bbat.map! do |b| 204: b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b 205: end 206: 207: # currently we use a loop. this could be better, but basically, 208: # the act of writing out the bat, itself requires blocks which get 209: # recorded in the bat. 210: # 211: # i'm sure that there'd be some simpler closed form solution to this. solve 212: # recursive func: 213: # 214: # num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0)) 215: # bbat_len = initial_bbat_len + num_mbat_blocks 216: # mbat_len = ceil(bbat_len * 4 / block_size) 217: # 218: # the actual bbat allocation table is itself stored throughout the file, and that chain 219: # is stored in the initial blocks, and the mbat blocks. 220: num_mbat_blocks = 0 221: io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC 222: # truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a 223: # contiguous chunk at the end. 224: # hmmm, i think this truncate should be matched with a truncate of the underlying io. if you 225: # delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can 226: # be fixed easily, add an io truncate 227: @bbat.truncate! 228: before = @io.size 229: @io.truncate @bbat.block_size * (@bbat.length + 1) 230: while true 231: # get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of 232: # the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration 233: # progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the 234: # mbat must remain contiguous. 235: bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size 236: # now storing the excess mbat blocks also increases the size of the bbat: 237: new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / (@bbat.block_size.to_f - 4)).ceil 238: if new_num_mbat_blocks != num_mbat_blocks 239: # need more space for the mbat. 240: num_mbat_blocks = new_num_mbat_blocks 241: elsif io.size != bbat_data_len 242: # need more space for the bat 243: # this may grow the bbat, depending on existing available blocks 244: io.truncate bbat_data_len 245: else 246: break 247: end 248: end 249: 250: # now extract the info we want: 251: ranges = io.ranges 252: bbat_chain = @bbat.chain io.first_block 253: io.close 254: bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT } 255: # tack on the mbat stuff 256: @header.num_bat = bbat_chain.length 257: mbat_blocks = (0...num_mbat_blocks).map do 258: block = @bbat.free_block 259: @bbat[block] = AllocationTable::META_BAT 260: block 261: end 262: @header.mbat_start = mbat_blocks.first || AllocationTable::EOC 263: 264: # now finally write the bbat, using a not resizable io. 265: # the mode here will be 'r', which allows write atm. 266: RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s } 267: 268: # this is the mbat. pad it out. 269: bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max 270: @header.num_mbat = num_mbat_blocks 271: if num_mbat_blocks != 0 272: # write out the mbat blocks now. first of all, where are they going to be? 273: mbat_data = bbat_chain[109..-1] 274: # expand the mbat_data to include the linked list forward pointers. 275: mbat_data = mbat_data.to_enum(:each_slice, @bbat.block_size / 4 - 1).to_a. 276: zip(mbat_blocks[1..-1] + [nil]).map { |a, b| b ? a + [b] : a } 277: # pad out the last one. 278: mbat_data.last.push(*([AllocationTable::AVAIL] * (@bbat.block_size / 4 - mbat_data.last.length))) 279: RangesIO.open @io, :ranges => @bbat.ranges(mbat_blocks) do |f| 280: f.write mbat_data.flatten.pack('V*') 281: end 282: end 283: 284: # now seek back and write the header out 285: @io.seek 0 286: @io.write @header.to_s + bbat_chain[0, 109].pack('V*') 287: @io.flush 288: end
# File lib/ole/storage/base.rb, line 333 333: def inspect 334: "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>" 335: end
TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
# File lib/ole/storage/base.rb, line 105 105: def load 106: # we always read 512 for the header block. if the block size ends up being different, 107: # what happens to the 109 fat entries. are there more/less entries? 108: @io.rewind 109: header_block = @io.read 512 110: @header = Header.new header_block 111: 112: # create an empty bbat. 113: @bbat = AllocationTable::Big.new self 114: bbat_chain = header_block[Header::SIZE..-1].unpack 'V*' 115: mbat_block = @header.mbat_start 116: @header.num_mbat.times do 117: blocks = @bbat.read([mbat_block]).unpack 'V*' 118: mbat_block = blocks.pop 119: bbat_chain += blocks 120: end 121: # am i using num_bat in the right way? 122: @bbat.load @bbat.read(bbat_chain[0, @header.num_bat]) 123: 124: # get block chain for directories, read it, then split it into chunks and load the 125: # directory entries. semantics changed - used to cut at first dir where dir.type == 0 126: @dirents = @bbat.read(@header.dirent_start).to_enum(:each_chunk, Dirent::SIZE). 127: map { |str| Dirent.new self, str }.reject { |d| d.type_id == 0 } 128: 129: # now reorder from flat into a tree 130: # links are stored in some kind of balanced binary tree 131: # check that everything is visited at least, and at most once 132: # similarly with the blocks of the file. 133: # was thinking of moving this to Dirent.to_tree instead. 134: class << @dirents 135: def to_tree idx=0 136: return [] if idx == Dirent::EOT 137: d = self[idx] 138: d.children = to_tree d.child 139: raise FormatError, "directory #{d.inspect} used twice" if d.idx 140: d.idx = idx 141: to_tree(d.prev) + [d] + to_tree(d.next) 142: end 143: end 144: 145: @root = @dirents.to_tree.first 146: Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry' 147: unused = @dirents.reject(&:idx).length 148: Log.warn "#{unused} unused directories" if unused > 0 149: 150: # FIXME i don't currently use @header.num_sbat which i should 151: # hmm. nor do i write it. it means what exactly again? 152: # which mode to use here? 153: @sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size 154: @sbat = AllocationTable::Small.new self 155: @sbat.load @bbat.read(@header.sbat_start) 156: end
# File lib/ole/storage/meta_data.rb, line 143 143: def meta_data 144: @meta_data ||= MetaData.new(self) 145: end
could be useful with mis-behaving ole documents. or to just clean them up.
# File lib/ole/storage/base.rb, line 306 306: def repack temp=:file 307: case temp 308: when :file 309: Tempfile.open 'ole-repack' do |io| 310: io.binmode 311: repack_using_io io 312: end 313: when :mem; StringIO.open('', &method(:repack_using_io)) 314: else raise ArgumentError, "unknown temp backing #{temp.inspect}" 315: end 316: end