123456789_123456789_123456789_123456789_123456789_

Class: Zlib::GzipReader

Relationships & Source Files
Super Chains via Extension / Inclusion / Inheritance
Class Chain:
self, GzipFile
Instance Chain:
self, Enumerable, GzipFile
Inherits: Zlib::GzipFile
Defined in: ext/zlib/zlib.c,
ext/zlib/zlib.c

Overview

GzipReader is the class for reading a gzipped file. GzipReader should be used as an IO, or -IO-like, object.

Zlib::GzipReader.open('hoge.gz') {|gz|
  print gz.read
}

File.open('hoge.gz') do |f|
  gz = Zlib::GzipReader.new(f)
  print gz.read
  gz.close
end

Method Catalogue

The following methods in GzipReader are just like their counterparts in IO, but they raise Error or GzipFile::Error exception if an error was found in the gzip file.

Be careful of the footer of the gzip file. A gzip file has the checksum of pre-compressed data in its footer. GzipReader checks all uncompressed data against that checksum at the following cases, and if it fails, raises GzipFile::NoFooter, GzipFile::CRCError, or GzipFile::LengthError exception.

  • When an reading request is received beyond the end of file (the end of compressed data). That is, when Zlib::GzipReader#read, Zlib::GzipReader#gets, or some other methods for reading returns nil.

  • When GzipFile#close method is called after the object reaches the end of file.

  • When #unused method is called after the object reaches the end of file.

The rest of the methods are adequately described in their own documentation.

Class Method Summary

GzipFile - Inherited

.wrap

Creates a GzipReader or GzipWriter associated with io, passing in any necessary extra options, and executes the block with the newly created object just like File.open.

Instance Attribute Summary

  • #eof (also: #eof?) readonly

    Returns true or false whether the stream has reached the end.

  • #eof? readonly

    Alias for #eof.

  • #lineno rw

    The line number of the last row read from this file.

  • #lineno=(lineno) rw

    Specify line number of the last row read from this file.

GzipFile - Inherited

#closed?

Same as IO#closed?

#sync

Same as IO#sync

#sync=

Same as IO.

Instance Method Summary

GzipFile - Inherited

#close

Closes the GzipFile object.

#comment

Returns comments recorded in the gzip file header, or nil if the comments is not present.

#crc

Returns CRC value of the uncompressed data.

#finish

Closes the GzipFile object.

#level

Returns compression level.

#mtime

Returns last modification time recorded in the gzip file header.

#orig_name

Returns original filename recorded in the gzip file header, or nil if original filename is not present.

#os_code

Returns OS code number recorded in the gzip file header.

#to_io

Same as IO.

Constructor Details

.new(io, options = {})

Creates a GzipReader object associated with io. The GzipReader object reads gzipped data from io, and parses/decompresses it. The io must have a #read method that behaves same as the IO#read.

The options hash may be used to set the encoding of the data. :external_encoding, :internal_encoding and :encoding may be set as in IO.new.

If the gzip file header is incorrect, raises an GzipFile::Error exception.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 3949

static VALUE
rb_gzreader_initialize(int argc, VALUE *argv, VALUE obj)
{
    VALUE io, opt = Qnil;
    struct gzfile *gz;
    int err;

    TypedData_Get_Struct(obj, struct gzfile, &gzfile_data_type, gz);
    rb_scan_args(argc, argv, "1:", &io, &opt);

    /* this is undocumented feature of zlib */
    err = inflateInit2(&gz->z.stream, -MAX_WBITS);
    if (err != Z_OK) {
	raise_zlib_error(err, gz->z.stream.msg);
    }
    gz->io = io;
    ZSTREAM_READY(&gz->z);
    gzfile_read_header(gz, Qnil);
    rb_gzfile_ecopts(gz, opt);

    if (rb_respond_to(io, id_path)) {
	/* File#path may raise IOError in case when a path is unavailable */
	rb_rescue2(gzfile_initialize_path_partial, obj, NULL, Qnil, rb_eIOError, (VALUE)0);
    }

    return obj;
}

Class Method Details

.open(filename) {|gz| ... }

Opens a file specified by filename as a gzipped file, and returns a GzipReader object associated with that file. Further details of this method are in .new and ZLib::GzipFile.wrap.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 3872

static VALUE
rb_gzreader_s_open(int argc, VALUE *argv, VALUE klass)
{
    return gzfile_s_open(argc, argv, klass, "rb");
}

.zcat(io, options = {}, &block) ⇒ nil .zcat(io, options = {}) ⇒ String

Decompresses all gzip data in the io, handling multiple gzip streams until the end of the io. There should not be any non-gzip data after the gzip streams.

If a block is given, it is yielded strings of uncompressed data, and the method returns nil. If a block is not given, the method returns the concatenation of all uncompressed data in all gzip streams.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 3894

static VALUE
rb_gzreader_s_zcat(int argc, VALUE *argv, VALUE klass)
{
    VALUE io, unused, obj, buf=0, tmpbuf;
    long pos;

    rb_check_arity(argc, 1, 2);
    io = argv[0];

    do {
        obj = rb_funcallv(klass, rb_intern("new"), argc, argv);
        if (rb_block_given_p()) {
           rb_gzreader_each(0, 0, obj);
        }
        else {
            if (!buf) {
                buf = rb_str_new(0, 0);
            }
            tmpbuf = gzfile_read_all(get_gzfile(obj));
            rb_str_cat(buf, RSTRING_PTR(tmpbuf), RSTRING_LEN(tmpbuf));
        }

        rb_gzreader_read(0, 0, obj);
        pos = NUM2LONG(rb_funcall(io, rb_intern("pos"), 0));
        unused = rb_gzreader_unused(obj);
        rb_gzfile_finish(obj);
        if (!NIL_P(unused)) {
            pos -= NUM2LONG(rb_funcall(unused, rb_intern("length"), 0));
            rb_funcall(io, rb_intern("pos="), 1, LONG2NUM(pos));
        }
    } while (pos < NUM2LONG(rb_funcall(io, rb_intern("size"), 0)));

    if (rb_block_given_p()) {
        return Qnil;
    }
    return buf;
}

Instance Attribute Details

#eof (readonly) Also known as: #eof?

Returns true or false whether the stream has reached the end.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 3499

static VALUE
rb_gzfile_eof_p(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    while (!ZSTREAM_IS_FINISHED(&gz->z) && ZSTREAM_BUF_FILLED(&gz->z) == 0) {
	gzfile_read_more(gz, Qnil);
    }
    return GZFILE_IS_FINISHED(gz) ? Qtrue : Qfalse;
}

#eof? (readonly)

Alias for #eof.

#lineno (rw)

The line number of the last row read from this file.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 3338

static VALUE
rb_gzfile_lineno(VALUE obj)
{
    return INT2NUM(get_gzfile(obj)->lineno);
}

#lineno=(lineno) (rw)

Specify line number of the last row read from this file.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 3349

static VALUE
rb_gzfile_set_lineno(VALUE obj, VALUE lineno)
{
    struct gzfile *gz = get_gzfile(obj);
    gz->lineno = NUM2INT(lineno);
    return lineno;
}

Instance Method Details

#each(*args) Also known as: #each_line

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4419

static VALUE
rb_gzreader_each(int argc, VALUE *argv, VALUE obj)
{
    VALUE str;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(str = gzreader_gets(argc, argv, obj))) {
	rb_yield(str);
    }
    return obj;
}

#each_byte

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4145

static VALUE
rb_gzreader_each_byte(VALUE obj)
{
    VALUE c;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(c = rb_gzreader_getbyte(obj))) {
	rb_yield(c);
    }
    return Qnil;
}

#each_char

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4127

static VALUE
rb_gzreader_each_char(VALUE obj)
{
    VALUE c;

    RETURN_ENUMERATOR(obj, 0, 0);

    while (!NIL_P(c = rb_gzreader_getc(obj))) {
	rb_yield(c);
    }
    return Qnil;
}

#each_line(*args)

Alias for #each.

#external_encoding

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4453

static VALUE
rb_gzreader_external_encoding(VALUE self)
{
    return rb_enc_from_encoding(get_gzfile(self)->enc);
}

#getbyte

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4093

static VALUE
rb_gzreader_getbyte(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    VALUE dst;

    dst = gzfile_read(gz, 1);
    if (!NIL_P(dst)) {
	dst = INT2FIX((unsigned int)(RSTRING_PTR(dst)[0]) & 0xff);
    }
    return dst;
}

#getc

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4064

static VALUE
rb_gzreader_getc(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);

    return gzfile_getc(gz);
}

#gets(*args)

See GzipReader documentation for a description. However, note that this method can return nil even if #eof? returns false, unlike the behavior of File#gets.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4387

static VALUE
rb_gzreader_gets(int argc, VALUE *argv, VALUE obj)
{
    VALUE dst;
    dst = gzreader_gets(argc, argv, obj);
    if (!NIL_P(dst)) {
	rb_lastline_set(dst);
    }
    return dst;
}

#pos Also known as: #tell

Total number of output bytes output so far.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 3560

static VALUE
rb_gzfile_total_out(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    uLong total_out = gz->z.stream.total_out;
    long buf_filled = ZSTREAM_BUF_FILLED(&gz->z);

    if (total_out >= (uLong)buf_filled) {
        return rb_uint2inum(total_out - buf_filled);
    } else {
        return LONG2FIX(-(buf_filled - (long)total_out));
    }
}

#read(*args)

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4010

static VALUE
rb_gzreader_read(int argc, VALUE *argv, VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    VALUE vlen;
    long len;

    rb_scan_args(argc, argv, "01", &vlen);
    if (NIL_P(vlen)) {
	return gzfile_read_all(gz);
    }

    len = NUM2INT(vlen);
    if (len < 0) {
	rb_raise(rb_eArgError, "negative length %ld given", len);
    }
    return gzfile_read(gz, len);
}

#readbyte

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4111

static VALUE
rb_gzreader_readbyte(VALUE obj)
{
    VALUE dst;
    dst = rb_gzreader_getbyte(obj);
    if (NIL_P(dst)) {
	rb_raise(rb_eEOFError, "end of file reached");
    }
    return dst;
}

#readchar

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4077

static VALUE
rb_gzreader_readchar(VALUE obj)
{
    VALUE dst;
    dst = rb_gzreader_getc(obj);
    if (NIL_P(dst)) {
	rb_raise(rb_eEOFError, "end of file reached");
    }
    return dst;
}

#readline(*args)

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4403

static VALUE
rb_gzreader_readline(int argc, VALUE *argv, VALUE obj)
{
    VALUE dst;
    dst = rb_gzreader_gets(argc, argv, obj);
    if (NIL_P(dst)) {
	rb_raise(rb_eEOFError, "end of file reached");
    }
    return dst;
}

#readlines(*args)

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4437

static VALUE
rb_gzreader_readlines(int argc, VALUE *argv, VALUE obj)
{
    VALUE str, dst;
    dst = rb_ary_new();
    while (!NIL_P(str = gzreader_gets(argc, argv, obj))) {
	rb_ary_push(dst, str);
    }
    return dst;
}

#readpartial(maxlen [, outbuf]) ⇒ String, outbuf

Reads at most maxlen bytes from the gziped stream but it blocks only if gzipreader has no data immediately available. If the optional outbuf argument is present, it must reference a String, which will receive the data. It raises EOFError on end of file.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4041

static VALUE
rb_gzreader_readpartial(int argc, VALUE *argv, VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    VALUE vlen, outbuf;
    long len;

    rb_scan_args(argc, argv, "11", &vlen, &outbuf);

    len = NUM2INT(vlen);
    if (len < 0) {
	rb_raise(rb_eArgError, "negative length %ld given", len);
    }
    if (!NIL_P(outbuf))
        Check_Type(outbuf, T_STRING);
    return gzfile_readpartial(gz, len, outbuf);
}

#rewind

Resets the position of the file pointer to the point created the GzipReader object. The associated IO object needs to respond to the seek method.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 3983

static VALUE
rb_gzreader_rewind(VALUE obj)
{
    struct gzfile *gz = get_gzfile(obj);
    gzfile_reader_rewind(gz);
    return INT2FIX(0);
}

#tell

Alias for #pos.

#ungetbyte(ch)

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4185

static VALUE
rb_gzreader_ungetbyte(VALUE obj, VALUE ch)
{
    struct gzfile *gz = get_gzfile(obj);
    gzfile_ungetbyte(gz, NUM2CHR(ch));
    return Qnil;
}

#ungetc(s)

See GzipReader documentation for a description.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 4163

static VALUE
rb_gzreader_ungetc(VALUE obj, VALUE s)
{
    struct gzfile *gz;

    if (FIXNUM_P(s))
	return rb_gzreader_ungetbyte(obj, s);
    gz = get_gzfile(obj);
    StringValue(s);
    if (gz->enc2 && gz->enc2 != rb_ascii8bit_encoding()) {
	s = rb_str_conv_enc(s, rb_enc_get(s), gz->enc2);
    }
    gzfile_ungets(gz, (const Bytef*)RSTRING_PTR(s), RSTRING_LEN(s));
    RB_GC_GUARD(s);
    return Qnil;
}

#unused

Returns the rest of the data which had read for parsing gzip format, or nil if the whole gzip file is not parsed yet.

[ GitHub ]

  
# File 'ext/zlib/zlib.c', line 3997

static VALUE
rb_gzreader_unused(VALUE obj)
{
    struct gzfile *gz;
    TypedData_Get_Struct(obj, struct gzfile, &gzfile_data_type, gz);
    return gzfile_reader_get_unused(gz);
}