This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/packages/clib/mime.c
2011-03-10 11:05:53 +00:00

392 lines
10 KiB
C

/* $Id$
Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: wielemak@science.uva.nl
WWW: http://www.swi-prolog.org
Copyright (C): 1985-2005, University of Amsterdam
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef __WINDOWS__
#define HAVE_MALLOC_H 1
#endif
#include <SWI-Stream.h>
#include <SWI-Prolog.h>
#include <rfc2045.h>
#include "error.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include <errno.h>
#undef max /* be sure we have ours */
#define max(x, y) ((x)>(y) ? (x) : (y))
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This module defines an interface to the rfc2045 (MIME) parsing library
by Double Precision, Inc, part of the maildrop system.
Parsing MIME messages is accomplished using a single predicate. This
predicate parses the input and returns a complex term holding the
various MIME message parts. The mime message is encoded into the
following structure:
mime(Attributes, Data, SubMimeList)
Where Data is the (decoded) field data returned as an atom, Attributes
is a property-list and SubMimeList is a list of mime/3 terms reflecting
the sub-parts. Attributes contains the following members:
# id(Atom)
# description(Atom)
# language(Atom)
# md5(Atom)
# type(Atom)
# character_set(Atom)
# transfer_encoding(Atom)
# disposition(Atom)
# filename(Atom)
# name(Atom)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static atom_t ATOM_;
static atom_t ATOM_stream;
static functor_t FUNCTOR_type1;
static functor_t FUNCTOR_transfer_encoding1;
static functor_t FUNCTOR_character_set1;
static functor_t FUNCTOR_mime3;
static functor_t FUNCTOR_id1;
static functor_t FUNCTOR_description1;
static functor_t FUNCTOR_language1;
static functor_t FUNCTOR_md51;
static functor_t FUNCTOR_disposition1;
static functor_t FUNCTOR_name1;
static functor_t FUNCTOR_filename1;
struct dbuf
{ char *buf;
size_t size;
size_t allocated;
};
static int
add_data(const char *ndata, size_t len, void *closure)
{ struct dbuf *dbuf = closure;
if ( dbuf->size + (int)len > dbuf->allocated )
{ dbuf->allocated = max(dbuf->allocated, max(1024, dbuf->size + (int)len));
if ( dbuf->buf )
dbuf->buf = realloc(dbuf->buf, dbuf->allocated);
else
dbuf->buf = malloc(dbuf->allocated);
if ( !dbuf->buf )
{ pl_error("mime_parse", 3, NULL, ERR_ERRNO, errno, "add_data", "mime", 0);
return -1;
}
}
memcpy(dbuf->buf+dbuf->size, ndata, len);
dbuf->size += len;
return 0;
}
static int
mime_unify_data(term_t data, struct rfc2045 *rfc, const char *buffer)
{ off_t start_pos, end_pos, start_body, nlines, nbodylines;
struct dbuf dbuf;
int rval;
dbuf.buf = NULL;
dbuf.size = 0;
dbuf.allocated = 0;
rfc2045_mimepos(rfc,
&start_pos, &end_pos, &start_body, &nlines, &nbodylines);
rfc2045_cdecode_start(rfc, add_data, &dbuf);
if ( rfc2045_cdecode(rfc, buffer+start_body, end_pos-start_body) == 0 &&
rfc2045_cdecode_end(rfc) == 0 )
{ rval = PL_unify_atom_nchars(data, dbuf.size, dbuf.buf);
} else
rval = FALSE;
if ( dbuf.buf )
free(dbuf.buf);
return rval;
}
/* add_attribute() adds a name(value) term to the list if value is provided
(i.e. not NULL and non "")
*/
static int
add_attribute(term_t list, const char *value, functor_t functor)
{ if ( value && value[0] )
{ term_t h = PL_new_term_ref();
int rval;
rval = PL_unify_list(list, h, list) &&
PL_unify_term(h, PL_FUNCTOR, functor, PL_CHARS, value);
PL_reset_term_refs(h);
return rval;
}
return TRUE;
}
static int
mime_unify(term_t result, struct rfc2045 *rfc, const char *buffer)
{ term_t data = PL_new_term_ref();
term_t subs = PL_new_term_ref();
term_t atts = PL_new_term_ref();
if ( !PL_unify_term(result,
PL_FUNCTOR, FUNCTOR_mime3,
PL_TERM, atts,
PL_TERM, data,
PL_TERM, subs) )
return FALSE;
if ( rfc->isdummy )
{ if ( !PL_unify_nil(data) ||
!PL_unify_nil(atts) )
return FALSE;
} else
{ term_t at = PL_copy_term_ref(atts);
const char *type, *enc, *cset;
const char *disp, *name, *fnam;
const char *id = rfc2045_content_id(rfc);
const char *desc = rfc2045_content_description(rfc);
const char *lang = rfc2045_content_language(rfc);
const char *md5 = rfc2045_content_md5(rfc);
rfc2045_mimeinfo(rfc, &type, &enc, &cset);
rfc2045_dispositioninfo(rfc, &disp, &name, &fnam);
if ( !add_attribute(at, type, FUNCTOR_type1) ) return FALSE;
if ( !add_attribute(at, enc, FUNCTOR_transfer_encoding1) ) return FALSE;
if ( !add_attribute(at, cset, FUNCTOR_character_set1) ) return FALSE;
if ( !add_attribute(at, id, FUNCTOR_id1) ) return FALSE;
if ( !add_attribute(at, desc, FUNCTOR_description1) ) return FALSE;
if ( !add_attribute(at, lang, FUNCTOR_language1) ) return FALSE;
if ( !add_attribute(at, disp, FUNCTOR_disposition1) ) return FALSE;
if ( !add_attribute(at, name, FUNCTOR_name1) ) return FALSE;
if ( !add_attribute(at, fnam, FUNCTOR_filename1) ) return FALSE;
if ( !add_attribute(at, md5, FUNCTOR_md51) ) return FALSE;
if ( !PL_unify_nil(at) )
return FALSE;
}
if ( rfc->firstpart )
{ term_t st = PL_copy_term_ref(subs);
term_t s = PL_new_term_ref();
struct rfc2045 *sub;
if ( !PL_unify_atom(data, ATOM_) )
return FALSE;
for(sub=rfc->firstpart; sub; sub = sub->next)
{ if ( sub->isdummy )
continue;
if ( !PL_unify_list(st, s, st) ||
!mime_unify(s, sub, buffer) )
return FALSE;
}
return PL_unify_nil(st);
} else
{ if ( !PL_unify_nil(subs) ||
!mime_unify_data(data, rfc, buffer) )
return FALSE;
}
return TRUE;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
get_character_data()
Get a buffer of data from a specification. Currently the following
specs are acceptable:
stream(Stream) All data from this stream
stream(Stream, N) At most N characters from stream
Atom, String, CodeList Data from native Prolog character data
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static int
get_character_data(term_t from, char **data, size_t *len, int *malloced)
{ atom_t name;
int arity;
char *buf;
size_t size;
if ( PL_get_name_arity(from, &name, &arity) && arity > 0 )
{ if ( name == ATOM_stream )
{ IOSTREAM *stream;
term_t arg = PL_new_term_ref();
_PL_get_arg(1, from, arg);
if ( !PL_get_stream_handle(arg, &stream) )
return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, from, "stream");
if ( arity == 1 ) /* stream(Stream) */
{ int c;
size_t done, allocated = 1024;
if ( !(buf = malloc(allocated)) )
return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
for( done=0; (c=Sgetcode(stream)) != EOF; )
{ if ( done >= allocated )
{ allocated *= 2;
if ( !(buf = realloc(buf, allocated)) )
return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
}
buf[done++] = c;
}
*len = done;
*data = buf;
*malloced = TRUE;
return TRUE;
} else if ( arity == 2 ) /* stream(Stream, Length) */
{ long size;
long done;
int c;
_PL_get_arg(2, from, arg);
if ( !PL_get_long(arg, &size) || size < 0 )
return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, arg, "natural");
if ( !(buf = malloc(size)) )
return pl_error(NULL, 0, NULL, ERR_ERRNO, errno, "allocate", "memory", 0);
for( done=0; (c=Sgetcode(stream)) != EOF && done < size; )
buf[done++] = c;
*len = done;
*data = buf;
*malloced = TRUE;
return TRUE;
}
}
} else if ( PL_get_nchars(from, &size, data, CVT_ATOM|CVT_STRING|CVT_LIST) )
{ *len = size;
*malloced = FALSE;
return TRUE;
}
return pl_error(NULL, 0, NULL, ERR_ARGTYPE, 1, from, "data");
}
foreign_t
mime_parse(term_t handle, term_t result)
{ char *buf;
size_t len = 0;
int malloced = FALSE;
struct rfc2045 *rfc;
int rval;
if ( !get_character_data(handle, &buf, &len, &malloced) )
return FALSE;
rfc = rfc2045_alloc();
rfc2045_parse(rfc, buf, len);
rval = mime_unify(result, rfc, buf);
if ( malloced )
free(buf);
rfc2045_free(rfc);
return rval;
}
/*******************************
* ERRORS *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Not typically elegant, but the documentation whishes us to call exit(),
which is even worse.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
void
rfc2045_error(const char *errmsg)
{ term_t e = PL_new_term_ref();
if ( (e=PL_new_term_ref()) &&
PL_unify_term(e,
PL_FUNCTOR_CHARS, "error", 2,
PL_FUNCTOR_CHARS, "mime", 1,
PL_CHARS, errmsg,
PL_VARIABLE) )
PL_throw(e);
PL_fatal_error("Could not recover from rfc2045 error");
}
/*******************************
* INSTALL *
*******************************/
#define mkfunctor(n, a) PL_new_functor(PL_new_atom(n), a)
install_t
install_mime()
{ ATOM_ = PL_new_atom("");
ATOM_stream = PL_new_atom("stream");
FUNCTOR_type1 = mkfunctor("type", 1);
FUNCTOR_transfer_encoding1 = mkfunctor("transfer_encoding", 1);
FUNCTOR_character_set1 = mkfunctor("character_set", 1);
FUNCTOR_mime3 = mkfunctor("mime", 3);
FUNCTOR_id1 = mkfunctor("id", 1);
FUNCTOR_description1 = mkfunctor("description", 1);
FUNCTOR_language1 = mkfunctor("language", 1);
FUNCTOR_md51 = mkfunctor("md5", 1);
FUNCTOR_disposition1 = mkfunctor("disposition", 1);
FUNCTOR_name1 = mkfunctor("name", 1);
FUNCTOR_filename1 = mkfunctor("filename", 1);
PL_register_foreign("mime_parse", 2, mime_parse, 0);
}