In Files

  • psych/lib/psych/parser.rb
  • psych/emitter.c

Parent

Namespace

Methods

Psych::Parser

YAML event parser class. This class parses a YAML document and calls events on the handler that is passed to the constructor. The events can be used for things such as constructing a YAML AST or deserializing YAML documents. It can even be fed back to Psych::Emitter to emit the same document that was parsed.

See Psych::Handler for documentation on the events that Psych::Parser emits.

Here is an example that prints out ever scalar found in a YAML document:

# Handler for detecting scalar values
class ScalarHandler < Psych::Handler
  def scalar value, anchor, tag, plain, quoted, style
    puts value
  end
end

parser = Psych::Parser.new(ScalarHandler.new)
parser.parse(yaml_document)

Here is an example that feeds the parser back in to Psych::Emitter. The YAML document is read from STDIN and written back out to STDERR:

parser = Psych::Parser.new(Psych::Emitter.new($stderr))
parser.parse($stdin)

Psych uses Psych::Parser in combination with Psych::TreeBuilder to construct an AST of the parsed YAML document.

Constants

ANY

Let the parser choose the encoding

UTF16BE

UTF-16-BE Encoding with BOM

UTF16LE

UTF-16-LE Encoding with BOM

UTF8

UTF-8 Encoding

Attributes

external_encoding[W]

Set the encoding for this parser to encoding

handler[RW]

The handler on which events will be called

Public Class Methods

new(handler = Handler.new) click to toggle source

Creates a new Psych::Parser instance with handler. YAML events will be called on handler. See Psych::Parser for more details.

 
               # File psych/lib/psych/parser.rb, line 46
def initialize handler = Handler.new
  @handler = handler
  @external_encoding = ANY
end
            

Public Instance Methods

mark # => # click to toggle source

Returns a Psych::Parser::Mark object that contains line, column, and index information.

 
               static VALUE mark(VALUE self)
{
    VALUE mark_klass;
    VALUE args[3];
    yaml_parser_t * parser;

    Data_Get_Struct(self, yaml_parser_t, parser);
    mark_klass = rb_const_get_at(cPsychParser, rb_intern("Mark"));
    args[0] = INT2NUM(parser->mark.index);
    args[1] = INT2NUM(parser->mark.line);
    args[2] = INT2NUM(parser->mark.column);

    return rb_class_new_instance(3, args, mark_klass);
}
            
parse(yaml) click to toggle source

Parse the YAML document contained in yaml. Events will be called on the handler set on the parser instance.

See Psych::Parser and #handler

 
               static VALUE parse(int argc, VALUE *argv, VALUE self)
{
    VALUE yaml, path;
    yaml_parser_t * parser;
    yaml_event_t event;
    int done = 0;
    int tainted = 0;
    int state = 0;
    int parser_encoding = YAML_ANY_ENCODING;
#ifdef HAVE_RUBY_ENCODING_H
    int encoding = rb_utf8_encindex();
    rb_encoding * internal_enc = rb_default_internal_encoding();
#endif
    VALUE handler = rb_iv_get(self, "@handler");

    if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
        if(rb_respond_to(yaml, id_path))
            path = rb_funcall(yaml, id_path, 0);
        else
            path = rb_str_new2("<unknown>");
    }

    Data_Get_Struct(self, yaml_parser_t, parser);

    yaml_parser_delete(parser);
    yaml_parser_initialize(parser);

    if (OBJ_TAINTED(yaml)) tainted = 1;

    if (rb_respond_to(yaml, id_read)) {
#ifdef HAVE_RUBY_ENCODING_H
        yaml = transcode_io(yaml, &parser_encoding);
        yaml_parser_set_encoding(parser, parser_encoding);
#endif
        yaml_parser_set_input(parser, io_reader, (void *)yaml);
        if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
    } else {
        StringValue(yaml);
#ifdef HAVE_RUBY_ENCODING_H
        yaml = transcode_string(yaml, &parser_encoding);
        yaml_parser_set_encoding(parser, parser_encoding);
#endif
        yaml_parser_set_input_string(
                parser,
                (const unsigned char *)RSTRING_PTR(yaml),
                (size_t)RSTRING_LEN(yaml)
                );
    }

    while(!done) {
        if(!yaml_parser_parse(parser, &event)) {
            VALUE exception;

            exception = make_exception(parser, path);
            yaml_parser_delete(parser);
            yaml_parser_initialize(parser);

            rb_exc_raise(exception);
        }

        switch(event.type) {
            case YAML_STREAM_START_EVENT:
              {
                  VALUE args[2];

                  args[0] = handler;
                  args[1] = INT2NUM((long)event.data.stream_start.encoding);
                  rb_protect(protected_start_stream, (VALUE)args, &state);
              }
              break;
          case YAML_DOCUMENT_START_EVENT:
            {
                VALUE args[4];
                /* Get a list of tag directives (if any) */
                VALUE tag_directives = rb_ary_new();
                /* Grab the document version */
                VALUE version = event.data.document_start.version_directive ?
                    rb_ary_new3(
                        (long)2,
                        INT2NUM((long)event.data.document_start.version_directive->major),
                        INT2NUM((long)event.data.document_start.version_directive->minor)
                        ) : rb_ary_new();

                if(event.data.document_start.tag_directives.start) {
                    yaml_tag_directive_t *start =
                        event.data.document_start.tag_directives.start;
                    yaml_tag_directive_t *end =
                        event.data.document_start.tag_directives.end;
                    for(; start != end; start++) {
                        VALUE handle = Qnil;
                        VALUE prefix = Qnil;
                        if(start->handle) {
                            handle = rb_str_new2((const char *)start->handle);
                            if (tainted) OBJ_TAINT(handle);
#ifdef HAVE_RUBY_ENCODING_H
                            PSYCH_TRANSCODE(handle, encoding, internal_enc);
#endif
                        }

                        if(start->prefix) {
                            prefix = rb_str_new2((const char *)start->prefix);
                            if (tainted) OBJ_TAINT(prefix);
#ifdef HAVE_RUBY_ENCODING_H
                            PSYCH_TRANSCODE(prefix, encoding, internal_enc);
#endif
                        }

                        rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
                    }
                }
                args[0] = handler;
                args[1] = version;
                args[2] = tag_directives;
                args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
                rb_protect(protected_start_document, (VALUE)args, &state);
            }
            break;
          case YAML_DOCUMENT_END_EVENT:
            {
                VALUE args[2];

                args[0] = handler;
                args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
                rb_protect(protected_end_document, (VALUE)args, &state);
            }
            break;
          case YAML_ALIAS_EVENT:
            {
                VALUE args[2];
                VALUE alias = Qnil;
                if(event.data.alias.anchor) {
                    alias = rb_str_new2((const char *)event.data.alias.anchor);
                    if (tainted) OBJ_TAINT(alias);
#ifdef HAVE_RUBY_ENCODING_H
                    PSYCH_TRANSCODE(alias, encoding, internal_enc);
#endif
                }

                args[0] = handler;
                args[1] = alias;
                rb_protect(protected_alias, (VALUE)args, &state);
            }
            break;
          case YAML_SCALAR_EVENT:
            {
                VALUE args[7];
                VALUE anchor = Qnil;
                VALUE tag = Qnil;
                VALUE plain_implicit, quoted_implicit, style;
                VALUE val = rb_str_new(
                    (const char *)event.data.scalar.value,
                    (long)event.data.scalar.length
                    );
                if (tainted) OBJ_TAINT(val);

#ifdef HAVE_RUBY_ENCODING_H
                PSYCH_TRANSCODE(val, encoding, internal_enc);
#endif

                if(event.data.scalar.anchor) {
                    anchor = rb_str_new2((const char *)event.data.scalar.anchor);
                    if (tainted) OBJ_TAINT(anchor);
#ifdef HAVE_RUBY_ENCODING_H
                    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
                }

                if(event.data.scalar.tag) {
                    tag = rb_str_new2((const char *)event.data.scalar.tag);
                    if (tainted) OBJ_TAINT(tag);
#ifdef HAVE_RUBY_ENCODING_H
                    PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
                }

                plain_implicit =
                    event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;

                quoted_implicit =
                    event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;

                style = INT2NUM((long)event.data.scalar.style);

                args[0] = handler;
                args[1] = val;
                args[2] = anchor;
                args[3] = tag;
                args[4] = plain_implicit;
                args[5] = quoted_implicit;
                args[6] = style;
                rb_protect(protected_scalar, (VALUE)args, &state);
            }
            break;
          case YAML_SEQUENCE_START_EVENT:
            {
                VALUE args[5];
                VALUE anchor = Qnil;
                VALUE tag = Qnil;
                VALUE implicit, style;
                if(event.data.sequence_start.anchor) {
                    anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
                    if (tainted) OBJ_TAINT(anchor);
#ifdef HAVE_RUBY_ENCODING_H
                    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
                }

                tag = Qnil;
                if(event.data.sequence_start.tag) {
                    tag = rb_str_new2((const char *)event.data.sequence_start.tag);
                    if (tainted) OBJ_TAINT(tag);
#ifdef HAVE_RUBY_ENCODING_H
                    PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
                }

                implicit =
                    event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;

                style = INT2NUM((long)event.data.sequence_start.style);

                args[0] = handler;
                args[1] = anchor;
                args[2] = tag;
                args[3] = implicit;
                args[4] = style;

                rb_protect(protected_start_sequence, (VALUE)args, &state);
            }
            break;
          case YAML_SEQUENCE_END_EVENT:
            rb_protect(protected_end_sequence, handler, &state);
            break;
          case YAML_MAPPING_START_EVENT:
            {
                VALUE args[5];
                VALUE anchor = Qnil;
                VALUE tag = Qnil;
                VALUE implicit, style;
                if(event.data.mapping_start.anchor) {
                    anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
                    if (tainted) OBJ_TAINT(anchor);
#ifdef HAVE_RUBY_ENCODING_H
                    PSYCH_TRANSCODE(anchor, encoding, internal_enc);
#endif
                }

                if(event.data.mapping_start.tag) {
                    tag = rb_str_new2((const char *)event.data.mapping_start.tag);
                    if (tainted) OBJ_TAINT(tag);
#ifdef HAVE_RUBY_ENCODING_H
                    PSYCH_TRANSCODE(tag, encoding, internal_enc);
#endif
                }

                implicit =
                    event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;

                style = INT2NUM((long)event.data.mapping_start.style);

                args[0] = handler;
                args[1] = anchor;
                args[2] = tag;
                args[3] = implicit;
                args[4] = style;

                rb_protect(protected_start_mapping, (VALUE)args, &state);
            }
            break;
          case YAML_MAPPING_END_EVENT:
            rb_protect(protected_end_mapping, handler, &state);
            break;
          case YAML_NO_EVENT:
            rb_protect(protected_empty, handler, &state);
            break;
          case YAML_STREAM_END_EVENT:
            rb_protect(protected_end_stream, handler, &state);
            done = 1;
            break;
        }
        yaml_event_delete(&event);
        if (state) rb_jump_tag(state);
    }

    return self;
}
            

Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.

If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.

If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.

If you want to help improve the Ruby documentation, please visit Documenting-ruby.org.

blog comments powered by Disqus