Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #75 allow Roo::Excelx to open streams #209

Merged
merged 1 commit into from
May 7, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lib/roo/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,10 @@ def key_to_string(arr)
"#{arr[0]},#{arr[1]}"
end

def is_stream?(filename_or_stream)
filename_or_stream.respond_to?(:seek)
end

private

def clean_sheet_if_need(options)
Expand All @@ -451,6 +455,7 @@ def search_or_set_header(options)
end

def local_filename(filename, tmpdir, packed)
return if is_stream?(filename)
filename = download_uri(filename, tmpdir) if uri?(filename)
filename = unzip(filename, tmpdir) if packed == :zip
unless File.file?(filename)
Expand Down
47 changes: 33 additions & 14 deletions lib/roo/excelx.rb
Original file line number Diff line number Diff line change
Expand Up @@ -250,20 +250,23 @@ def pad_cells(cell, last_column)
# values for packed: :zip
# optional cell_max (int) parameter for early aborting attempts to parse
# enormous documents.
def initialize(filename, options = {})
def initialize(filename_or_stream, options = {})
packed = options[:packed]
file_warning = options.fetch(:file_warning, :error)
cell_max = options.delete(:cell_max)
sheet_options = {}
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)

file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
unless is_stream?(filename_or_stream)
file_type_check(filename_or_stream,'.xlsx','an Excel-xlsx', file_warning, packed)
basename = File.basename(filename_or_stream)
end

@tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root])
@filename = local_filename(filename, @tmpdir, packed)
@tmpdir = make_tmpdir(basename, options[:tmpdir_root])
@filename = local_filename(filename_or_stream, @tmpdir, packed)
@comments_files = []
@rels_files = []
process_zipfile(@tmpdir, @filename)
process_zipfile(@filename || filename_or_stream)

@sheet_names = workbook.sheets.map do |sheet|
unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
Expand Down Expand Up @@ -574,10 +577,26 @@ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
end

# Extracts all needed files from the zip file
def process_zipfile(tmpdir, zipfilename)
def process_zipfile(zipfilename_or_stream)
@sheet_files = []
entries = Zip::File.open(zipfilename).to_a.sort_by(&:name)

unless is_stream?(zipfilename_or_stream)
process_zipfile_entries Zip::File.open(zipfilename_or_stream).to_a.sort_by(&:name)
else
stream = Zip::InputStream.open zipfilename_or_stream
begin
entries = []
while entry = stream.get_next_entry
entries << entry
end
process_zipfile_entries entries
ensure
stream.close
end
end
end

def process_zipfile_entries entries
# NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
# are not in order. With Numbers 3.1, the first sheet is always
# sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
Expand All @@ -593,31 +612,31 @@ def process_zipfile(tmpdir, zipfilename)
# workbook.xml.rel:
# <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
# <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
sheet_ids = extract_worksheet_ids(entries, "#{tmpdir}/roo_workbook.xml")
sheets = extract_worksheet_rels(entries, "#{tmpdir}/roo_workbook.xml.rels")
extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)

entries.each do |entry|
path =
case entry.name.downcase
when /sharedstrings.xml$/
"#{tmpdir}/roo_sharedStrings.xml"
"#{@tmpdir}/roo_sharedStrings.xml"
when /styles.xml$/
"#{tmpdir}/roo_styles.xml"
"#{@tmpdir}/roo_styles.xml"
when /comments([0-9]+).xml$/
# FIXME: Most of the time, The order of the comment files are the same
# the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
# In some situations, this isn't true. The true location of a
# sheet's comment file is in the sheet1.xml.rels file. SEE
# ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
nr = Regexp.last_match[1].to_i
@comments_files[nr - 1] = "#{tmpdir}/roo_comments#{nr}"
@comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
when /sheet([0-9]+).xml.rels$/
# FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
# it also stores the location for sharedStrings, comments,
# drawings, etc.
nr = Regexp.last_match[1].to_i
@rels_files[nr - 1] = "#{tmpdir}/roo_rels#{nr}"
@rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
end

entry.extract(path) if path
Expand Down
9 changes: 8 additions & 1 deletion test/test_roo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#STDERR.reopen "/dev/null","w"

require 'test_helper'
require 'stringio'

class TestRoo < Minitest::Test

Expand Down Expand Up @@ -2063,5 +2064,11 @@ def test_noexpand_merged_range
end
end


def test_open_stream
return unless EXCELX
file_contents = File.read File.join(TESTDIR, fixture_filename(:numbers1, :excelx))
stream = StringIO.new(file_contents)
xlsx = Roo::Excelx.new(stream)
assert_equal ["Tabelle1","Name of Sheet 2","Sheet3","Sheet4","Sheet5"], xlsx.sheets
end
end # class