392 lines
10 KiB
C
392 lines
10 KiB
C
|
/* $Id$
|
||
|
|
||
|
Part of SWI-Prolog
|
||
|
|
||
|
Author: Jan Wielemaker
|
||
|
E-mail: wielemak@science.uva.nl
|
||
|
WWW: http://www.swi-prolog.org
|
||
|
Copyright (C): 1985-2005, University of Amsterdam
|
||
|
|
||
|
This library is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU Lesser General Public
|
||
|
License as published by the Free Software Foundation; either
|
||
|
version 2.1 of the License, or (at your option) any later version.
|
||
|
|
||
|
This library is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
Lesser General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Lesser General Public
|
||
|
License along with this library; if not, write to the Free Software
|
||
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
*/
|
||
|
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include <config.h>
|
||
|
#endif
|
||
|
|
||
|
#ifdef __WINDOWS__
|
||
|
#define HAVE_MALLOC_H 1
|
||
|
#endif
|
||
|
|
||
|
#include <SWI-Stream.h>
|
||
|
#include <SWI-Prolog.h>
|
||
|
#include <rfc2045.h>
|
||
|
#include "error.h"
|
||
|
#ifdef HAVE_MALLOC_H
|
||
|
#include <malloc.h>
|
||
|
#endif
|
||
|
#include <errno.h>
|
||
|
|
||
|
#undef max /* be sure we have ours */
|
||
|
#define max(x, y) ((x)>(y) ? (x) : (y))
|
||
|
|
||
|
|
||
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
|
This module defines an interface to the rfc2045 (MIME) parsing library
|
||
|
by Double Precision, Inc, part of the maildrop system.
|
||
|
|
||
|
Parsing MIME messages is accomplished using a single predicate. This
|
||
|
predicate parses the input and returns a complex term holding the
|
||
|
various MIME message parts. The mime message is encoded into the
|
||
|
following structure:
|
||
|
|
||
|
mime(Attributes, Data, SubMimeList)
|
||
|
|
||
|
Where Data is the (decoded) field data returned as an atom, Attributes
|
||
|
is a property-list and SubMimeList is a list of mime/3 terms reflecting
|
||
|
the sub-parts. Attributes contains the following members:
|
||
|
|
||
|
# id(Atom)
|
||
|
# description(Atom)
|
||
|
# language(Atom)
|
||
|
# md5(Atom)
|
||
|
# type(Atom)
|
||
|
# character_set(Atom)
|
||
|
# transfer_encoding(Atom)
|
||
|
# disposition(Atom)
|
||
|
# filename(Atom)
|
||
|
# name(Atom)
|
||
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
||
|
|
||
|
static atom_t ATOM_;
|
||
|
static atom_t ATOM_stream;
|
||
|
static functor_t FUNCTOR_type1;
|
||
|
static functor_t FUNCTOR_transfer_encoding1;
|
||
|
static functor_t FUNCTOR_character_set1;
|
||
|
static functor_t FUNCTOR_mime3;
|
||
|
static functor_t FUNCTOR_id1;
|
||
|
static functor_t FUNCTOR_description1;
|
||
|
static functor_t FUNCTOR_language1;
|
||
|
static functor_t FUNCTOR_md51;
|
||
|
static functor_t FUNCTOR_disposition1;
|
||
|
static functor_t FUNCTOR_name1;
|
||
|
static functor_t FUNCTOR_filename1;
|
||
|
|
||
|
struct dbuf
|
||
|
{ char *buf;
|
||
|
int size;
|
||
|
int allocated;
|
||
|
};
|
||
|
|
||
|
static int
|
||
|
add_data(const char *ndata, size_t len, void *closure)
|
||
|
{ struct dbuf *dbuf = closure;
|
||
|
|
||
|
if ( dbuf->size + (int)len > dbuf->allocated )
|
||
|
{ dbuf->allocated = max(dbuf->allocated, max(1024, dbuf->size + (int)len));
|
||
|
if ( dbuf->buf )
|
||
|
dbuf->buf = realloc(dbuf->buf, dbuf->allocated);
|
||
|
else
|
||
|
dbuf->buf = malloc(dbuf->allocated);
|
||
|
|
||
|
if ( !dbuf->buf )
|
||
|
{ pl_error("mime_parse", 3, NULL, ERR_ERRNO, errno, "add_data", "mime", 0);
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
memcpy(dbuf->buf+dbuf->size, ndata, len);
|
||
|
dbuf->size += len;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
static int
|
||
|
mime_unify_data(term_t data, struct rfc2045 *rfc, const char *buffer)
|
||
|
{ off_t start_pos, end_pos, start_body, nlines, nbodylines;
|
||
|
struct dbuf dbuf;
|
||
|
int rval;
|
||
|
|
||
|
dbuf.buf = NULL;
|
||
|
dbuf.size = 0;
|
||
|
dbuf.allocated = 0;
|
||
|
|
||
|
rfc2045_mimepos(rfc,
|
||
|
&start_pos, &end_pos, &start_body, &nlines, &nbodylines);
|
||
|
rfc2045_cdecode_start(rfc, add_data, &dbuf);
|
||
|
if ( rfc2045_cdecode(rfc, buffer+start_body, end_pos-start_body) == 0 &&
|
||
|
rfc2045_cdecode_end(rfc) == 0 )
|
||
|
{ rval = PL_unify_atom_nchars(data, dbuf.size, dbuf.buf);
|
||
|
} else
|
||
|
rval = FALSE;
|
||
|
|
||
|
if ( dbuf.buf )
|
||
|
free(dbuf.buf);
|
||
|
|
||
|
return rval;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* add_attribute() adds a name(value) term to the list if value is provided
|
||
|
(i.e. not NULL and non "")
|
||
|
*/
|
||
|
|
||
|
static int
|
||
|
add_attribute(term_t list, const char *value, functor_t functor)
|
||
|
{ if ( value && value[0] )
|
||
|
{ term_t h = PL_new_term_ref();
|
||
|
int rval;
|
||
|
|
||
|
rval = PL_unify_list(list, h, list) &&
|
||
|
PL_unify_term(h, PL_FUNCTOR, functor, PL_CHARS, value);
|
||
|
|
||
|
PL_reset_term_refs(h);
|
||
|
return rval;
|
||
|
}
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
mime_unify(term_t result, struct rfc2045 *rfc, const char *buffer)
|
||
|
{ term_t data = PL_new_term_ref();
|
||
|
term_t subs = PL_new_term_ref();
|
||
|
term_t atts = PL_new_term_ref();
|
||
|
|
||
|
if ( !PL_unify_term(result,
|
||
|
PL_FUNCTOR, FUNCTOR_mime3,
|
||
|
PL_TERM, atts,
|
||
|
PL_TERM, data,
|
||
|
PL_TERM, subs) )
|
||
|
return FALSE;
|
||
|
|
||
|
if ( rfc->isdummy )
|
||
|
{ if ( !PL_unify_nil(data) ||
|
||
|
!PL_unify_nil(atts) )
|
||
|
return FALSE;
|
||
|
} else
|
||
|
{ term_t at = PL_copy_term_ref(atts);
|
||
|
const char *type, *enc, *cset;
|
||
|
const char *disp, *name, *fnam;
|
||
|
|
||
|
const char *id = rfc2045_content_id(rfc);
|
||
|
const char *desc = rfc2045_content_description(rfc);
|
||
|
const char *lang = rfc2045_content_language(rfc);
|
||
|
const char *md5 = rfc2045_content_md5(rfc);
|
||
|
|
||
|
rfc2045_mimeinfo(rfc, &type, &enc, &cset);
|
||
|
rfc2045_dispositioninfo(rfc, &disp, &name, &fnam);
|
||
|
|
||
|
if ( !add_attribute(at, type, FUNCTOR_type1) ) return FALSE;
|
||
|
if ( !add_attribute(at, enc, FUNCTOR_transfer_encoding1) ) return FALSE;
|
||
|
if ( !add_attribute(at, cset, FUNCTOR_character_set1) ) return FALSE;
|
||
|
if ( !add_attribute(at, id, FUNCTOR_id1) ) return FALSE;
|
||
|
if ( !add_attribute(at, desc, FUNCTOR_description1) ) return FALSE;
|
||
|
if ( !add_attribute(at, lang, FUNCTOR_language1) ) return FALSE;
|
||
|
if ( !add_attribute(at, disp, FUNCTOR_disposition1) ) return FALSE;
|
||
|
if ( !add_attribute(at, name, FUNCTOR_name1) ) return FALSE;
|
||
|
if ( !add_attribute(at, fnam, FUNCTOR_filename1) ) return FALSE;
|
||
|
if ( !add_attribute(at, md5, FUNCTOR_md51) ) return FALSE;
|
||
|
|
||
|
if ( !PL_unify_nil(at) )
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
if ( rfc->firstpart )
|
||
|
{ term_t st = PL_copy_term_ref(subs);
|
||
|
term_t s = PL_new_term_ref();
|
||
|
struct rfc2045 *sub;
|
||
|
|
||
|
if ( !PL_unify_atom(data, ATOM_) )
|
||
|
return FALSE;
|
||
|
|
||
|
for(sub=rfc->firstpart; sub; sub = sub->next)
|
||
|
{ if ( sub->isdummy )
|
||
|
continue;
|
||
|
|
||
|
if ( !PL_unify_list(st, s, st) ||
|
||
|
!mime_unify(s, sub, buffer) )
|
||
|
return FALSE;
|
||
|
}
|
||
|
return PL_unify_nil(st);
|
||
|
} else
|
||
|
{ if ( !PL_unify_nil(subs) ||
|
||
|
!mime_unify_data(data, rfc, buffer) )
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
|
get_character_data()
|
||
|
Get a buffer of data from a specification. Currently the following
|
||
|
specs are acceptable:
|
||
|
|
||
|
stream(Stream) All data from this stream
|
||
|
stream(Stream, N) At most N characters from stream
|
||
|
Atom, String, CodeList Data from native Prolog character data
|
||
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
||
|
|
||
|
static int
|
||
|
get_character_data(term_t from, char **data, size_t *len, int *malloced)
|
||
|
{ atom_t name;
|
||
|
int arity;
|
||
|
char *buf;
|
||
|
size_t size;
|
||
|
|
||
|
if ( PL_get_name_arity(from, &name, &arity) && arity > 0 )
|
||
|
{ if ( name == ATOM_stream )
|
||
|
{ IOSTREAM *stream;
|
||
|
term_t arg = PL_new_term_ref();
|
||
|
|
||
|
_PL_get_arg(1, from, arg);
|
||
|
if ( !PL_get_stream_handle(arg, &stream) )
|
||
|
return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, from, "stream");
|
||
|
|
||
|
if ( arity == 1 ) /* stream(Stream) */
|
||
|
{ int c;
|
||
|
size_t done, allocated = 1024;
|
||
|
|
||
|
if ( !(buf = malloc(allocated)) )
|
||
|
return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
|
||
|
|
||
|
for( done=0; (c=Sgetcode(stream)) != EOF; )
|
||
|
{ if ( done >= allocated )
|
||
|
{ allocated *= 2;
|
||
|
|
||
|
if ( !(buf = realloc(buf, allocated)) )
|
||
|
return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
|
||
|
}
|
||
|
|
||
|
buf[done++] = c;
|
||
|
}
|
||
|
|
||
|
*len = done;
|
||
|
*data = buf;
|
||
|
*malloced = TRUE;
|
||
|
|
||
|
return TRUE;
|
||
|
} else if ( arity == 2 ) /* stream(Stream, Length) */
|
||
|
{ long size;
|
||
|
long done;
|
||
|
int c;
|
||
|
|
||
|
_PL_get_arg(2, from, arg);
|
||
|
if ( !PL_get_long(arg, &size) || size < 0 )
|
||
|
return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, arg, "natural");
|
||
|
|
||
|
if ( !(buf = malloc(size)) )
|
||
|
return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
|
||
|
|
||
|
for( done=0; (c=Sgetcode(stream)) != EOF && done < size; )
|
||
|
buf[done++] = c;
|
||
|
|
||
|
*len = done;
|
||
|
*data = buf;
|
||
|
*malloced = TRUE;
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
}
|
||
|
} else if ( PL_get_nchars(from, &size, data, CVT_ATOM|CVT_STRING|CVT_LIST) )
|
||
|
{ *len = size;
|
||
|
*malloced = FALSE;
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, from, "data");
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
foreign_t
|
||
|
mime_parse(term_t handle, term_t result)
|
||
|
{ char *buf;
|
||
|
size_t len = 0;
|
||
|
int malloced = FALSE;
|
||
|
struct rfc2045 *rfc;
|
||
|
int rval;
|
||
|
|
||
|
if ( !get_character_data(handle, &buf, &len, &malloced) )
|
||
|
return FALSE;
|
||
|
|
||
|
rfc = rfc2045_alloc();
|
||
|
rfc2045_parse(rfc, buf, len);
|
||
|
rval = mime_unify(result, rfc, buf);
|
||
|
|
||
|
if ( malloced )
|
||
|
free(buf);
|
||
|
rfc2045_free(rfc);
|
||
|
|
||
|
return rval;
|
||
|
}
|
||
|
|
||
|
/*******************************
|
||
|
* ERRORS *
|
||
|
*******************************/
|
||
|
|
||
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
|
Not typically elegant, but the documentation whishes us to call exit(),
|
||
|
which is even worse.
|
||
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
||
|
|
||
|
void
|
||
|
rfc2045_error(const char *errmsg)
|
||
|
{ term_t e = PL_new_term_ref();
|
||
|
|
||
|
if ( (e=PL_new_term_ref()) &&
|
||
|
PL_unify_term(e,
|
||
|
PL_FUNCTOR_CHARS, "error", 2,
|
||
|
PL_FUNCTOR_CHARS, "mime", 1,
|
||
|
PL_CHARS, errmsg,
|
||
|
PL_VARIABLE) )
|
||
|
PL_throw(e);
|
||
|
|
||
|
PL_fatal_error("Could not recover from rfc2045 error");
|
||
|
}
|
||
|
|
||
|
/*******************************
|
||
|
* INSTALL *
|
||
|
*******************************/
|
||
|
|
||
|
#define mkfunctor(n, a) PL_new_functor(PL_new_atom(n), a)
|
||
|
|
||
|
|
||
|
install_t
|
||
|
install_mime()
|
||
|
{ ATOM_ = PL_new_atom("");
|
||
|
ATOM_stream = PL_new_atom("stream");
|
||
|
|
||
|
FUNCTOR_type1 = mkfunctor("type", 1);
|
||
|
FUNCTOR_transfer_encoding1 = mkfunctor("transfer_encoding", 1);
|
||
|
FUNCTOR_character_set1 = mkfunctor("character_set", 1);
|
||
|
FUNCTOR_mime3 = mkfunctor("mime", 3);
|
||
|
FUNCTOR_id1 = mkfunctor("id", 1);
|
||
|
FUNCTOR_description1 = mkfunctor("description", 1);
|
||
|
FUNCTOR_language1 = mkfunctor("language", 1);
|
||
|
FUNCTOR_md51 = mkfunctor("md5", 1);
|
||
|
FUNCTOR_disposition1 = mkfunctor("disposition", 1);
|
||
|
FUNCTOR_name1 = mkfunctor("name", 1);
|
||
|
FUNCTOR_filename1 = mkfunctor("filename", 1);
|
||
|
|
||
|
PL_register_foreign("mime_parse", 2, mime_parse, 0);
|
||
|
}
|