1240 lines
25 KiB
C
1240 lines
25 KiB
C
/*
|
|
** Copyright 1998 - 1999 Double Precision, Inc. See COPYING for
|
|
** distribution information.
|
|
*/
|
|
|
|
/*
|
|
** $Id$
|
|
*/
|
|
#if HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#if HAVE_STRINGS_H
|
|
#include <strings.h>
|
|
#endif
|
|
#include <ctype.h>
|
|
#include "rfc2045.h"
|
|
#include "rfc822/rfc822.h"
|
|
#include "rfc2045charset.h"
|
|
|
|
#ifdef __WINDOWS__
|
|
#define strncasecmp strnicmp
|
|
#endif
|
|
|
|
static char *rfc2045_defcharset=0;
|
|
|
|
extern void rfc2045_enomem();
|
|
|
|
#define MAXLEVELS 20
|
|
#define MAXPARTS 300
|
|
|
|
/*
|
|
New RFC2045 structure.
|
|
*/
|
|
|
|
struct rfc2045 *rfc2045_alloc()
|
|
{
|
|
struct rfc2045 *p=(struct rfc2045 *)malloc(sizeof(struct rfc2045));
|
|
|
|
if (!p)
|
|
{
|
|
rfc2045_enomem();
|
|
return (0);
|
|
}
|
|
|
|
/* Initialize everything to nulls, except for one thing */
|
|
|
|
memset(p, '\0', sizeof(*p));
|
|
|
|
p->pindex=1; /* Start with part #1 */
|
|
p->workinheader=1;
|
|
/* Most of the time, we're about to read a header */
|
|
|
|
return (p);
|
|
}
|
|
|
|
const char *rfc2045_getattr(const struct rfc2045attr *p, const char *name)
|
|
{
|
|
while (p)
|
|
{
|
|
if (p->name && strcmp(p->name, name) == 0)
|
|
return (p->value);
|
|
p=p->next;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
void rfc2045_setattr(struct rfc2045attr **p, const char *name, const char *val)
|
|
{
|
|
char *v;
|
|
|
|
while (*p)
|
|
{
|
|
if (strcmp( (*p)->name, name) == 0) break;
|
|
p=&(*p)->next;
|
|
}
|
|
if (val == 0)
|
|
{
|
|
struct rfc2045attr *q= *p;
|
|
|
|
if (q)
|
|
{
|
|
*p=q->next;
|
|
if (q->name) free(q->name);
|
|
if (q->value) free(q->value);
|
|
free(q);
|
|
}
|
|
return;
|
|
}
|
|
|
|
v=strdup(val);
|
|
if (!v)
|
|
{
|
|
rfc2045_enomem();
|
|
return;
|
|
}
|
|
|
|
if (!*p)
|
|
{
|
|
if (((*p)=(struct rfc2045attr *)malloc(sizeof(**p))) == 0)
|
|
{
|
|
free(v);
|
|
rfc2045_enomem();
|
|
return;
|
|
}
|
|
memset( (*p), 0, sizeof(**p));
|
|
if ( ((*p)->name=strdup(name)) == 0)
|
|
{
|
|
free( *p );
|
|
*p=0;
|
|
free(v);
|
|
rfc2045_enomem();
|
|
return;
|
|
}
|
|
}
|
|
if ( (*p)->value ) free ( (*p)->value );
|
|
(*p)->value=v;
|
|
}
|
|
|
|
/* static const char cb_name[]="boundary"; */
|
|
|
|
/* #define ContentBoundary(p) (rfc2045_getattr( (p)->content_type_attr, cb_name)) */
|
|
|
|
#define ContentBoundary(p) ( (p)->boundary )
|
|
|
|
/*
|
|
Unallocate the RFC2045 structure. Recursively unallocate
|
|
all sub-structures. Unallocate all associated buffers.
|
|
*/
|
|
|
|
static void rfc2045_freeattr(struct rfc2045attr *p)
|
|
{
|
|
while (p)
|
|
{
|
|
struct rfc2045attr *q=p->next;
|
|
|
|
if (p->name) free(p->name);
|
|
if (p->value) free(p->value);
|
|
free(p);
|
|
p=q;
|
|
}
|
|
}
|
|
|
|
void rfc2045_free(struct rfc2045 *p)
|
|
{
|
|
struct rfc2045 *q, *r;
|
|
|
|
for (q=p->firstpart; q; )
|
|
{
|
|
r=q->next;
|
|
rfc2045_free(q);
|
|
q=r;
|
|
}
|
|
rfc2045_freeattr(p->content_type_attr);
|
|
rfc2045_freeattr(p->content_disposition_attr);
|
|
|
|
if (p->content_md5) free(p->content_md5);
|
|
if (p->content_base) free(p->content_base);
|
|
if (p->content_location) free(p->content_location);
|
|
if (p->content_language) free(p->content_language);
|
|
if (p->content_id) free(p->content_id);
|
|
if (p->content_description) free(p->content_description);
|
|
if (p->content_transfer_encoding) free(p->content_transfer_encoding);
|
|
if (p->boundary) free(p->boundary);
|
|
if (p->content_type) free(p->content_type);
|
|
if (p->mime_version) free(p->mime_version);
|
|
if (p->workbuf) free(p->workbuf);
|
|
if (p->content_disposition) free(p->content_disposition);
|
|
if (p->rw_transfer_encoding) free(p->rw_transfer_encoding);
|
|
free(p);
|
|
}
|
|
|
|
/*
|
|
Generic dynamic buffer append.
|
|
*/
|
|
|
|
void rfc2045_add_buf(
|
|
char **bufptr, /* Buffer */
|
|
size_t *bufsize, /* Buffer's maximum size */
|
|
size_t *buflen, /* Buffer's current size */
|
|
|
|
const char *p, size_t len) /* Append this data */
|
|
{
|
|
if (len + *buflen > *bufsize)
|
|
{
|
|
size_t newsize=len+*buflen+256;
|
|
char *p= *bufptr ? (char *)realloc(*bufptr, newsize):
|
|
(char *)malloc(newsize);
|
|
|
|
if (!p)
|
|
{
|
|
rfc2045_enomem();
|
|
return;
|
|
}
|
|
*bufptr=p;
|
|
*bufsize=newsize;
|
|
}
|
|
|
|
memcpy(*bufptr + *buflen, p, len);
|
|
*buflen += len;
|
|
}
|
|
|
|
/* Append to the work buffer */
|
|
|
|
void rfc2045_add_workbuf(struct rfc2045 *h, const char *p, size_t len)
|
|
{
|
|
rfc2045_add_buf( &h->workbuf, &h->workbufsize, &h->workbuflen, p, len);
|
|
}
|
|
|
|
/* Append one character to the work buffer */
|
|
|
|
void rfc2045_add_workbufch(struct rfc2045 *h, int c)
|
|
{
|
|
char cc= (char)c;
|
|
|
|
rfc2045_add_workbuf(h, &cc, 1);
|
|
}
|
|
|
|
/*
|
|
Generic function to duplicate contents of a string.
|
|
The destination string may already be previously allocated,
|
|
so unallocate it.
|
|
*/
|
|
|
|
static void set_string(char **p,
|
|
const char *q)
|
|
{
|
|
if (*p) free(*p);
|
|
|
|
*p=0;
|
|
if (!q) return;
|
|
|
|
if ((*p=(char *)malloc(strlen(q)+1)) == 0)
|
|
{
|
|
rfc2045_enomem();
|
|
return;
|
|
}
|
|
|
|
strcpy(*p, q);
|
|
}
|
|
|
|
/* Update byte counts for this structure, and all the superstructures */
|
|
|
|
static void update_counts(struct rfc2045 *p, size_t newcnt, size_t newendcnt,
|
|
unsigned nlines)
|
|
{
|
|
while (p)
|
|
{
|
|
p->endpos = newcnt;
|
|
p->endbody = newendcnt;
|
|
p->nlines += nlines;
|
|
if (!p->workinheader)
|
|
p->nbodylines += nlines;
|
|
p=p->parent;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Main entry point for RFC2045 parsing. External data is fed
|
|
by repetitively calling rfc2045_parse().
|
|
|
|
rfc2045_parse() breaks up input into lines, and calls doline()
|
|
to process each line.
|
|
*/
|
|
|
|
static void doline(struct rfc2045 *);
|
|
|
|
void rfc2045_parse(struct rfc2045 *h, const char *buf, size_t s)
|
|
{
|
|
size_t l;
|
|
|
|
while (s)
|
|
{
|
|
for (l=0; l<s; l++)
|
|
if (buf[l] == '\n') break;
|
|
if (l < s && buf[l] == '\n')
|
|
{
|
|
++l;
|
|
rfc2045_add_workbuf(h, buf, l);
|
|
doline(h);
|
|
h->workbuflen=0;
|
|
}
|
|
else
|
|
rfc2045_add_workbuf(h, buf, l);
|
|
buf += l;
|
|
s -= l;
|
|
}
|
|
|
|
/*
|
|
** Our buffer's getting pretty big. Let's see if we can
|
|
** partially handle it.
|
|
*/
|
|
|
|
if (h->workbuflen > 512)
|
|
{
|
|
struct rfc2045 *p;
|
|
size_t l, i;
|
|
|
|
for (p=h; p->lastpart && !p->lastpart->workclosed;
|
|
p=p->lastpart)
|
|
;
|
|
|
|
/* If p->workinheader, we've got a mother of all headers
|
|
** here. Well, that's just too bad, we'll end up garbling
|
|
** it.
|
|
*/
|
|
|
|
l=h->workbuflen;
|
|
|
|
/* We do need to make sure that the final \r\n gets
|
|
** stripped off, so don't gobble up everything if
|
|
** the last character we see is a \r
|
|
*/
|
|
|
|
if (h->workbuf[l-1] == '\r')
|
|
--l;
|
|
|
|
/* If we'll be rewriting, make sure rwprep knows about
|
|
** stuff that was skipped just now. */
|
|
|
|
if (h->rfc2045acptr && !p->workinheader &&
|
|
(!p->lastpart || !p->lastpart->workclosed))
|
|
(*h->rfc2045acptr->section_contents)(h->workbuf, l);
|
|
|
|
update_counts(p, p->endpos+l, p->endpos+l, 0);
|
|
p->informdata=1;
|
|
for (i=0; l<h->workbuflen; l++)
|
|
h->workbuf[i++]=h->workbuf[l];
|
|
h->workbuflen=i;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Append a new RFC2045 subpart. Adds new RFC2045 structure to the
|
|
end of the list of existing RFC2045 substructures.
|
|
*/
|
|
|
|
static struct rfc2045 *append_part_noinherit(struct rfc2045 *p, size_t startpos){
|
|
struct rfc2045 *newp;
|
|
|
|
newp=rfc2045_alloc();
|
|
if (p->lastpart)
|
|
{
|
|
p->lastpart->next=newp;
|
|
newp->pindex=p->lastpart->pindex+1;
|
|
}
|
|
else
|
|
{
|
|
p->firstpart=newp;
|
|
newp->pindex=0;
|
|
}
|
|
p->lastpart=newp;
|
|
newp->parent=p;
|
|
|
|
/* Initialize source pointers */
|
|
newp->startpos=newp->endpos=newp->startbody=newp->endbody=startpos;
|
|
|
|
while (p->parent)
|
|
p=p->parent;
|
|
++p->numparts;
|
|
|
|
return (newp);
|
|
}
|
|
|
|
static struct rfc2045 *append_part(struct rfc2045 *p, size_t startpos)
|
|
{
|
|
struct rfc2045 *newp=append_part_noinherit(p, startpos);
|
|
|
|
/* Substructures inherit content transfer encoding and character set */
|
|
|
|
set_string(&newp->content_transfer_encoding,
|
|
p->content_transfer_encoding);
|
|
rfc2045_setattr(&newp->content_type_attr, "charset",
|
|
rfc2045_getattr(p->content_type_attr, "charset"));
|
|
return (newp);
|
|
}
|
|
|
|
/*
|
|
doline() processes next line in the RFC2045 message.
|
|
|
|
Drills down the list of all the multipart messages currently open,
|
|
and checks if the line is a boundary line for the given multipart.
|
|
In theory the boundary line, if there is one, should be the boundary
|
|
line only for the inner multipart only, but, this takes into account
|
|
broken MIME messages.
|
|
*/
|
|
|
|
static void do_header(struct rfc2045 *);
|
|
|
|
static void doline(struct rfc2045 *p)
|
|
{
|
|
size_t cnt=p->workbuflen;
|
|
char *c=p->workbuf;
|
|
size_t n=cnt-1; /* Strip \n (we always get at least a \n here) */
|
|
struct rfc2045 *newp;
|
|
struct rfc2045ac *rwp=p->rfc2045acptr;
|
|
unsigned num_levels=0;
|
|
|
|
size_t k;
|
|
int bit8=0;
|
|
|
|
if (p->numparts > MAXPARTS)
|
|
{
|
|
p->rfcviolation |= RFC2045_ERR2COMPLEX;
|
|
return;
|
|
}
|
|
|
|
for (k=0; k<cnt; k++)
|
|
if (c[k] & 0x80) bit8=1;
|
|
|
|
if (n && c[n-1] == '\r') /* Strip trailing \r */
|
|
--n;
|
|
|
|
/* Before the main drill down loop before, look ahead and see if we're
|
|
** in a middle of a form-data section. */
|
|
|
|
for (newp=p; newp->lastpart &&
|
|
!newp->lastpart->workclosed; newp=newp->lastpart,
|
|
++num_levels)
|
|
{
|
|
if (ContentBoundary(newp) == 0 || newp->workinheader)
|
|
continue;
|
|
|
|
if (newp->lastpart->informdata)
|
|
{
|
|
p=newp->lastpart;
|
|
p->informdata=0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Drill down until we match a boundary, or until we've reached
|
|
the last RFC2045 section that has been opened.
|
|
*/
|
|
|
|
while (p->lastpart)
|
|
{
|
|
size_t l;
|
|
const char *cb;
|
|
|
|
if (p->lastpart->workclosed)
|
|
{
|
|
update_counts(p, p->endpos+cnt, p->endpos+cnt, 1);
|
|
return;
|
|
}
|
|
/* Leftover trash -- workclosed is set when the final
|
|
** terminating boundary has been seen */
|
|
|
|
/* content_boundary may be set before the entire header
|
|
** has been seen, so continue drilling down in that case
|
|
*/
|
|
|
|
cb=ContentBoundary(p);
|
|
|
|
if (cb == 0 || p->workinheader)
|
|
{
|
|
p=p->lastpart;
|
|
++num_levels;
|
|
continue;
|
|
}
|
|
|
|
l=strlen(cb);
|
|
|
|
if (c[0] == '-' && c[1] == '-' && n >= 2+l &&
|
|
strncasecmp(cb, c+2, l) == 0)
|
|
{
|
|
|
|
if (rwp && (!p->lastpart || !p->lastpart->isdummy))
|
|
(*rwp->end_section)();
|
|
|
|
/* Ok, we've found a boundary */
|
|
|
|
if (n >= 4+l && strncmp(c+2+l, "--", 2) == 0)
|
|
{
|
|
/* Last boundary */
|
|
|
|
p->lastpart->workclosed=1;
|
|
update_counts(p, p->endpos+cnt, p->endpos+cnt,
|
|
1);
|
|
return;
|
|
}
|
|
|
|
/* Create new RFC2045 section */
|
|
|
|
newp=append_part(p, p->endpos+cnt);
|
|
update_counts(p, p->endpos+cnt, p->endpos+n, 1);
|
|
|
|
/* The new RFC2045 section is MIME compliant */
|
|
|
|
if ((newp->mime_version=strdup(p->mime_version)) == 0)
|
|
rfc2045_enomem();
|
|
return;
|
|
}
|
|
p=p->lastpart;
|
|
++num_levels;
|
|
}
|
|
|
|
/* Ok, we've found the RFC2045 section that we're working with.
|
|
** No what?
|
|
*/
|
|
|
|
if (! p->workinheader)
|
|
{
|
|
/* Processing body, just update the counts. */
|
|
|
|
size_t cnt_update=cnt;
|
|
|
|
if (bit8 && !p->content_8bit &&
|
|
(p->rfcviolation & RFC2045_ERR8BITCONTENT) == 0)
|
|
{
|
|
struct rfc2045 *q;
|
|
|
|
for (q=p; q; q=q->parent)
|
|
q->rfcviolation |= RFC2045_ERR8BITCONTENT;
|
|
}
|
|
|
|
/*
|
|
** In multiparts, the final newline in a part belongs to the
|
|
** boundary, otherwise, include it in the text.
|
|
*/
|
|
if (p->parent && p->parent->content_type &&
|
|
strncasecmp(p->parent->content_type,
|
|
"multipart/", 10) == 0)
|
|
cnt_update=n;
|
|
|
|
if (!p->lastpart || !p->lastpart->workclosed)
|
|
{
|
|
if (rwp && !p->isdummy)
|
|
(*rwp->section_contents)(c, cnt);
|
|
|
|
update_counts(p, p->endpos+cnt, p->endpos+cnt_update,
|
|
1);
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (bit8 && (p->rfcviolation & RFC2045_ERR8BITHEADER) == 0)
|
|
{
|
|
struct rfc2045 *q;
|
|
|
|
for (q=p; q; q=q->parent)
|
|
q->rfcviolation |= RFC2045_ERR8BITHEADER;
|
|
}
|
|
|
|
/* In the header */
|
|
|
|
if ( n == 0 ) /* End of header, body begins. Parse header. */
|
|
{
|
|
do_header(p); /* Clean up any left over header line */
|
|
p->workinheader=0;
|
|
|
|
/* Message body starts right here */
|
|
|
|
p->startbody=p->endpos+cnt;
|
|
update_counts(p, p->startbody, p->startbody, 1);
|
|
--p->nbodylines; /* Don't count the blank line */
|
|
|
|
/* Discard content type and boundary if I don't understand
|
|
** this MIME flavor.
|
|
*/
|
|
|
|
if (!RFC2045_ISMIME1(p->mime_version))
|
|
{
|
|
set_string(&p->content_type, 0);
|
|
|
|
rfc2045_freeattr(p->content_type_attr);
|
|
p->content_type_attr=0;
|
|
set_string(&p->content_disposition, 0);
|
|
rfc2045_freeattr(p->content_disposition_attr);
|
|
p->content_disposition_attr=0;
|
|
if (p->boundary)
|
|
{
|
|
free(p->boundary);
|
|
p->boundary=0;
|
|
}
|
|
}
|
|
|
|
/* Normally, if we don't have a content_type, default it
|
|
** to text/plain. However, if the multipart type is
|
|
** multipart/digest, it is message/rfc822.
|
|
*/
|
|
|
|
if (RFC2045_ISMIME1(p->mime_version) && !p->content_type)
|
|
{
|
|
char *q="text/plain";
|
|
|
|
if (p->parent && p->parent->content_type &&
|
|
strcmp(p->parent->content_type,
|
|
"multipart/digest") == 0)
|
|
q="message/rfc822";
|
|
set_string(&p->content_type, q);
|
|
}
|
|
|
|
/* If this is not a multipart section, we don't want to
|
|
** hear about any boundaries
|
|
*/
|
|
|
|
if (!p->content_type ||
|
|
strncmp(p->content_type, "multipart/", 10))
|
|
rfc2045_setattr(&p->content_type_attr, "boundary", 0);
|
|
|
|
/* If this section's a message, we will expect to see
|
|
** more RFC2045 stuff, so create a nested RFC2045 structure,
|
|
** and indicate that we expect to see headers.
|
|
*/
|
|
|
|
if (p->content_type &&
|
|
strcmp(p->content_type, "message/rfc822") == 0)
|
|
{
|
|
newp=append_part_noinherit(p, p->startbody);
|
|
newp->workinheader=1;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
** If this is a multipart message (boundary defined),
|
|
** create a RFC2045 structure for the pseudo-section
|
|
** that precedes the first boundary line.
|
|
*/
|
|
|
|
if (ContentBoundary(p))
|
|
{
|
|
newp=append_part(p, p->startbody);
|
|
newp->workinheader=0;
|
|
newp->isdummy=1;
|
|
/* It's easier just to create it. */
|
|
return;
|
|
}
|
|
|
|
if (rwp)
|
|
(*rwp->start_section)(p);
|
|
return;
|
|
}
|
|
|
|
/* RFC822 header continues */
|
|
|
|
update_counts(p, p->endpos + cnt, p->endpos+n, 1);
|
|
|
|
/* If this header line starts with a space, append one space
|
|
** to the saved contents of the previous line, and append this
|
|
** line to it.
|
|
*/
|
|
|
|
if (isspace((int)(unsigned char)*c))
|
|
{
|
|
rfc2045_add_buf(&p->header, &p->headersize, &p->headerlen, " ", 1);
|
|
}
|
|
else
|
|
{
|
|
/* Otherwise the previous header line is complete, so process it */
|
|
|
|
do_header(p);
|
|
p->headerlen=0;
|
|
}
|
|
|
|
/* Save this line in the header buffer, because the next line
|
|
** could be a continuation.
|
|
*/
|
|
|
|
rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, c, n);
|
|
}
|
|
|
|
/***********************************************************************/
|
|
|
|
/*
|
|
** paste_tokens() - recombine an array of RFC822 tokens back as a string.
|
|
** (Comments) are ignored.
|
|
*/
|
|
|
|
static char *paste_tokens(struct rfc822t *h, int start, int cnt)
|
|
{
|
|
int l;
|
|
int i;
|
|
char *p;
|
|
|
|
/* Calculate string size */
|
|
|
|
l=1;
|
|
for (i=0; i<cnt; i++)
|
|
{
|
|
if (h->tokens[start+i].token == '(')
|
|
continue;
|
|
|
|
if (rfc822_is_atom(h->tokens[start+i].token))
|
|
l += h->tokens[start+i].len;
|
|
else
|
|
l++;
|
|
}
|
|
|
|
/* Do it */
|
|
|
|
p=( char *)malloc(l);
|
|
if (!p)
|
|
{
|
|
rfc2045_enomem();
|
|
return (0);
|
|
}
|
|
l=0;
|
|
|
|
for (i=0; i<cnt; i++)
|
|
{
|
|
if (h->tokens[start+i].token == '(')
|
|
continue;
|
|
|
|
if (rfc822_is_atom(h->tokens[start+i].token))
|
|
{
|
|
int l2=h->tokens[start+i].len;
|
|
|
|
memcpy(p+l, h->tokens[start+i].ptr, l2);
|
|
l += l2;
|
|
}
|
|
else p[l++]=h->tokens[start+i].token;
|
|
}
|
|
p[l]=0;
|
|
return (p);
|
|
}
|
|
|
|
/* Various permutations of the above, including forcing the string to
|
|
** lowercase
|
|
*/
|
|
|
|
static char *lower_paste_tokens(struct rfc822t *h, int start, int cnt)
|
|
{
|
|
char *p=paste_tokens(h, start, cnt);
|
|
char *q;
|
|
|
|
for (q=p; q && *q; q++)
|
|
*q=tolower(*q);
|
|
return (p);
|
|
}
|
|
|
|
static char *paste_token(struct rfc822t *h, int i)
|
|
{
|
|
if (i >= h->ntokens) return (0);
|
|
return (paste_tokens(h, i, 1));
|
|
}
|
|
|
|
static char *lower_paste_token(struct rfc822t *h, int i)
|
|
{
|
|
char *p=paste_token(h, i);
|
|
char *q;
|
|
|
|
for (q=p; q && *q; q++)
|
|
*q=tolower(*q);
|
|
return (p);
|
|
}
|
|
|
|
/*
|
|
do_header() - process completed RFC822 header.
|
|
*/
|
|
|
|
static void mime_version(struct rfc2045 *, struct rfc822t *);
|
|
static void content_type(struct rfc2045 *, struct rfc822t *);
|
|
static void content_transfer_encoding(struct rfc2045 *, struct rfc822t *);
|
|
static void content_disposition(struct rfc2045 *, struct rfc822t *);
|
|
static void content_id(struct rfc2045 *, struct rfc822t *);
|
|
static void content_description(struct rfc2045 *, const char *);
|
|
static void content_language(struct rfc2045 *, const char *);
|
|
static void content_md5(struct rfc2045 *, const char *);
|
|
static void content_base(struct rfc2045 *, struct rfc822t *);
|
|
static void content_location(struct rfc2045 *, struct rfc822t *);
|
|
|
|
static void do_header(struct rfc2045 *p)
|
|
{
|
|
struct rfc822t *header;
|
|
char *t;
|
|
|
|
if (p->headerlen == 0) return;
|
|
rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, "", 1);
|
|
/* 0 terminate */
|
|
|
|
/* Parse the header line according to RFC822 */
|
|
|
|
header=rfc822t_alloc(p->header, NULL);
|
|
|
|
if (!header) return; /* Broken header */
|
|
|
|
if (header->ntokens < 2 ||
|
|
header->tokens[0].token ||
|
|
header->tokens[1].token != ':')
|
|
{
|
|
rfc822t_free(header);
|
|
return; /* Broken header */
|
|
}
|
|
|
|
t=lower_paste_token(header, 0);
|
|
|
|
if (t == 0)
|
|
;
|
|
else if (strcmp(t, "mime-version") == 0)
|
|
{
|
|
free(t);
|
|
mime_version(p, header);
|
|
}
|
|
else if (strcmp(t, "content-type") == 0)
|
|
{
|
|
free(t);
|
|
content_type(p, header);
|
|
} else if (strcmp(t, "content-transfer-encoding") == 0)
|
|
{
|
|
free(t);
|
|
content_transfer_encoding(p, header);
|
|
} else if (strcmp(t, "content-disposition") == 0)
|
|
{
|
|
free(t);
|
|
content_disposition(p, header);
|
|
} else if (strcmp(t, "content-id") == 0)
|
|
{
|
|
free(t);
|
|
content_id(p, header);
|
|
} else if (strcmp(t, "content-description") == 0)
|
|
{
|
|
free(t);
|
|
t=strchr(p->header, ':');
|
|
if (t) ++t;
|
|
while (t && isspace((int)(unsigned char)*t))
|
|
++t;
|
|
content_description(p, t);
|
|
} else if (strcmp(t, "content-language") == 0)
|
|
{
|
|
free(t);
|
|
t=strchr(p->header, ':');
|
|
if (t) ++t;
|
|
while (t && isspace((int)(unsigned char)*t))
|
|
++t;
|
|
content_language(p, t);
|
|
} else if (strcmp(t, "content-base") == 0)
|
|
{
|
|
free(t);
|
|
content_base(p, header);
|
|
} else if (strcmp(t, "content-location") == 0)
|
|
{
|
|
free(t);
|
|
content_location(p, header);
|
|
} else if (strcmp(t, "content-md5") == 0)
|
|
{
|
|
free(t);
|
|
t=strchr(p->header, ':');
|
|
if (t) ++t;
|
|
while (t && isspace((int)(unsigned char)*t))
|
|
++t;
|
|
content_md5(p, t);
|
|
}
|
|
else free(t);
|
|
rfc822t_free(header);
|
|
}
|
|
|
|
/* Mime-Version: and Content-Transfer-Encoding: headers are easy */
|
|
|
|
static void mime_version(struct rfc2045 *p, struct rfc822t *header)
|
|
{
|
|
char *vers=paste_tokens(header, 2, header->ntokens-2);
|
|
|
|
if (!vers) return;
|
|
|
|
if (p->mime_version) free(p->mime_version);
|
|
p->mime_version=vers;
|
|
}
|
|
|
|
static void content_transfer_encoding(struct rfc2045 *r,
|
|
struct rfc822t *header)
|
|
{
|
|
char *p;
|
|
|
|
p=lower_paste_tokens(header, 2, header->ntokens-2);
|
|
if (!p) return;
|
|
|
|
if (r->content_transfer_encoding)
|
|
free(r->content_transfer_encoding);
|
|
r->content_transfer_encoding=p;
|
|
|
|
if (strcmp(p, "8bit") == 0)
|
|
r->content_8bit=1;
|
|
}
|
|
|
|
/* Dig into the content_type header */
|
|
|
|
static void parse_content_header(struct rfc2045 *r, struct rfc822t *header,
|
|
void (*init_token)(struct rfc2045 *, char *),
|
|
void (*init_parameter)(struct rfc2045 *, const char *,
|
|
struct rfc822t *, int, int))
|
|
{
|
|
int start;
|
|
int i, j;
|
|
char *p;
|
|
|
|
/* Look for the 1st ; */
|
|
|
|
for (start=2; start < header->ntokens; start++)
|
|
if (header->tokens[start].token == ';')
|
|
break;
|
|
|
|
/* Everything up to the 1st ; is the content type */
|
|
|
|
p=lower_paste_tokens(header, 2, start-2);
|
|
if (!p) return;
|
|
|
|
(*init_token)(r, p);
|
|
if (start < header->ntokens) start++;
|
|
|
|
/* Handle the remainder of the Content-Type: header */
|
|
|
|
while (start < header->ntokens)
|
|
{
|
|
/* Look for next ; */
|
|
|
|
for (i=start; i<header->ntokens; i++)
|
|
if (header->tokens[i].token == ';')
|
|
break;
|
|
j=start;
|
|
if (j < i)
|
|
{
|
|
++j;
|
|
|
|
/* We only understand <atom>= */
|
|
|
|
while (j < i && header->tokens[j].token == '(')
|
|
++j;
|
|
if (j < i && header->tokens[j].token == '=')
|
|
{
|
|
++j;
|
|
p=lower_paste_token(header, start);
|
|
if (!p) return;
|
|
(*init_parameter)(r, p, header, j, i-j);
|
|
free(p);
|
|
}
|
|
}
|
|
if ( i<header->ntokens ) ++i; /* Skip over ; */
|
|
start=i;
|
|
}
|
|
}
|
|
|
|
/* Dig into the content_type header */
|
|
|
|
static void save_content_type(struct rfc2045 *, char *);
|
|
static void save_content_type_parameter( struct rfc2045 *, const char *,
|
|
struct rfc822t *, int, int);
|
|
|
|
static void content_type(struct rfc2045 *r, struct rfc822t *header)
|
|
{
|
|
parse_content_header(r, header, &save_content_type,
|
|
&save_content_type_parameter);
|
|
}
|
|
|
|
static void save_content_type(struct rfc2045 *r, char *content_type)
|
|
{
|
|
if (r->content_type) free(r->content_type);
|
|
r->content_type=content_type;
|
|
}
|
|
|
|
static void save_content_type_parameter(
|
|
struct rfc2045 *r, const char *name,
|
|
struct rfc822t *header, int start, int len)
|
|
{
|
|
char *p;
|
|
|
|
p=strcmp(name, "charset") == 0 ?
|
|
lower_paste_tokens(header, start, len):
|
|
paste_tokens(header, start, len);
|
|
if (!p) return;
|
|
|
|
rfc2045_setattr(&r->content_type_attr, name, p);
|
|
free(p);
|
|
|
|
if (strcmp(name, "boundary") == 0)
|
|
{
|
|
if (r->boundary)
|
|
free(r->boundary);
|
|
p=lower_paste_tokens(header, start, len);
|
|
r->boundary=p;
|
|
}
|
|
}
|
|
|
|
/* Dig into content-disposition */
|
|
|
|
static void save_content_disposition(struct rfc2045 *, char *);
|
|
static void save_content_disposition_parameter( struct rfc2045 *, const char *,
|
|
struct rfc822t *, int, int);
|
|
|
|
static void content_disposition(struct rfc2045 *r, struct rfc822t *header)
|
|
{
|
|
parse_content_header(r, header, &save_content_disposition,
|
|
&save_content_disposition_parameter);
|
|
}
|
|
|
|
static void save_content_disposition(struct rfc2045 *r,
|
|
char *content_disposition)
|
|
{
|
|
if (r->content_disposition) free(r->content_disposition);
|
|
r->content_disposition=content_disposition;
|
|
}
|
|
|
|
static void save_content_disposition_parameter(
|
|
struct rfc2045 *r, const char *name,
|
|
struct rfc822t *header, int start, int len)
|
|
{
|
|
char *p;
|
|
|
|
p=paste_tokens(header, start, len);
|
|
if (!p) return;
|
|
|
|
rfc2045_setattr(&r->content_disposition_attr, name, p);
|
|
free(p);
|
|
}
|
|
|
|
char *rfc2045_related_start(const struct rfc2045 *p)
|
|
{
|
|
const char *cb=rfc2045_getattr( p->content_type_attr, "start");
|
|
struct rfc822t *t;
|
|
struct rfc822a *a;
|
|
int i;
|
|
|
|
if (!cb || !*cb) return (0);
|
|
|
|
t=rfc822t_alloc(cb, 0);
|
|
if (!t)
|
|
{
|
|
rfc2045_enomem();
|
|
return(0);
|
|
}
|
|
|
|
a=rfc822a_alloc(t);
|
|
if (!a)
|
|
{
|
|
rfc822t_free(t);
|
|
rfc2045_enomem();
|
|
return (0);
|
|
}
|
|
for (i=0; i<a->naddrs; i++)
|
|
if (a->addrs[i].tokens)
|
|
{
|
|
char *s=rfc822_getaddr(a, i);
|
|
|
|
rfc822a_free(a);
|
|
rfc822t_free(t);
|
|
if (!s)
|
|
rfc2045_enomem();
|
|
return (s);
|
|
}
|
|
|
|
rfc822a_free(a);
|
|
rfc822t_free(t);
|
|
return (0);
|
|
}
|
|
|
|
static void content_id(struct rfc2045 *p, struct rfc822t *t)
|
|
{
|
|
struct rfc822a *a=rfc822a_alloc(t);
|
|
int i;
|
|
|
|
if (!a)
|
|
{
|
|
rfc2045_enomem();
|
|
return;
|
|
}
|
|
|
|
for (i=0; i<a->naddrs; i++)
|
|
if (a->addrs[i].tokens)
|
|
{
|
|
char *s=rfc822_getaddr(a, i);
|
|
|
|
if (!s)
|
|
{
|
|
rfc822a_free(a);
|
|
rfc2045_enomem();
|
|
return;
|
|
}
|
|
if (p->content_id)
|
|
free(p->content_id);
|
|
p->content_id=s;
|
|
break;
|
|
}
|
|
|
|
rfc822a_free(a);
|
|
}
|
|
|
|
static void content_description(struct rfc2045 *p, const char *s)
|
|
{
|
|
if (s && *s)
|
|
set_string(&p->content_description, s);
|
|
}
|
|
|
|
static void content_language(struct rfc2045 *p, const char *s)
|
|
{
|
|
if (s && *s)
|
|
set_string(&p->content_language, s);
|
|
}
|
|
|
|
static void content_md5(struct rfc2045 *p, const char *s)
|
|
{
|
|
if (s && *s)
|
|
set_string(&p->content_md5, s);
|
|
}
|
|
|
|
static void content_base(struct rfc2045 *p, struct rfc822t *t)
|
|
{
|
|
char *s;
|
|
int i;
|
|
|
|
for (i=0; i<t->ntokens; i++)
|
|
if (t->tokens[i].token == '"')
|
|
t->tokens[i].token=0;
|
|
|
|
s=paste_tokens(t, 2, t->ntokens-2);
|
|
set_string(&p->content_base, s);
|
|
}
|
|
|
|
static void content_location(struct rfc2045 *p, struct rfc822t *t)
|
|
{
|
|
char *s;
|
|
int i;
|
|
|
|
for (i=0; i<t->ntokens; i++)
|
|
if (t->tokens[i].token == '"')
|
|
t->tokens[i].token=0;
|
|
|
|
s=paste_tokens(t, 2, t->ntokens-2);
|
|
set_string(&p->content_location, s);
|
|
}
|
|
|
|
/* -------------------- */
|
|
|
|
#define GETINFO(s, def) ( (s) && (*s) ? (s):def)
|
|
|
|
void rfc2045_mimeinfo(const struct rfc2045 *p,
|
|
const char **content_type_s,
|
|
const char **content_transfer_encoding_s,
|
|
const char **charset_s)
|
|
{
|
|
const char *c;
|
|
|
|
*content_type_s=GETINFO(p->content_type, "text/plain");
|
|
*content_transfer_encoding_s=GETINFO(p->content_transfer_encoding,
|
|
"8bit");
|
|
|
|
c=rfc2045_getattr(p->content_type_attr, "charset");
|
|
if (!c) c=rfc2045_getdefaultcharset();
|
|
|
|
*charset_s=c;
|
|
}
|
|
|
|
const char *rfc2045_getdefaultcharset()
|
|
{
|
|
const char *p=rfc2045_defcharset;
|
|
|
|
if (!p) p=RFC2045CHARSET;
|
|
return (p);
|
|
}
|
|
|
|
void rfc2045_setdefaultcharset(const char *charset)
|
|
{
|
|
char *p=strdup(charset);
|
|
|
|
if (!p)
|
|
{
|
|
rfc2045_enomem();
|
|
return;
|
|
}
|
|
|
|
if (rfc2045_defcharset) free(rfc2045_defcharset);
|
|
rfc2045_defcharset=p;
|
|
}
|
|
|
|
const char *rfc2045_boundary(const struct rfc2045 *p)
|
|
{
|
|
const char *cb=rfc2045_getattr( p->content_type_attr, "boundary");
|
|
|
|
if (!cb) cb="";
|
|
return (cb);
|
|
}
|
|
|
|
void rfc2045_dispositioninfo(const struct rfc2045 *p,
|
|
const char **disposition_s,
|
|
const char **disposition_name_s,
|
|
const char **disposition_filename_s)
|
|
{
|
|
*disposition_s=p->content_disposition;
|
|
*disposition_name_s=rfc2045_getattr(p->content_disposition_attr,
|
|
"name");
|
|
*disposition_filename_s=rfc2045_getattr(p->content_disposition_attr,
|
|
"filename");
|
|
}
|
|
|
|
const char *rfc2045_contentname(const struct rfc2045 *p)
|
|
{
|
|
const char *q=rfc2045_getattr(p->content_type_attr, "name");
|
|
|
|
if (!q) q="";
|
|
return (q);
|
|
}
|
|
|
|
const char *rfc2045_content_id(const struct rfc2045 *p)
|
|
{
|
|
return (p->content_id ? p->content_id:"");
|
|
}
|
|
|
|
const char *rfc2045_content_description(const struct rfc2045 *p)
|
|
{
|
|
return (p->content_description ? p->content_description:"");
|
|
}
|
|
|
|
const char *rfc2045_content_language(const struct rfc2045 *p)
|
|
{
|
|
return (p->content_language ? p->content_language:"");
|
|
}
|
|
|
|
const char *rfc2045_content_md5(const struct rfc2045 *p)
|
|
{
|
|
return (p->content_md5 ? p->content_md5:"");
|
|
}
|
|
|
|
void rfc2045_mimepos(const struct rfc2045 *p,
|
|
off_t *start_pos, off_t *end_pos, off_t *start_body,
|
|
off_t *nlines, off_t *nbodylines)
|
|
{
|
|
*start_pos=p->startpos;
|
|
*end_pos=p->endpos;
|
|
|
|
*nlines=p->nlines;
|
|
*nbodylines=p->nbodylines;
|
|
if (p->parent) /* MIME parts do not have the trailing CRLF */
|
|
{
|
|
*end_pos=p->endbody;
|
|
if (*nlines) --*nlines;
|
|
if (*nbodylines) --*nbodylines;
|
|
}
|
|
*start_body=p->startbody;
|
|
}
|
|
|
|
unsigned rfc2045_mimepartcount(const struct rfc2045 *p)
|
|
{
|
|
const struct rfc2045 *q;
|
|
unsigned n=0;
|
|
|
|
for (q=p->firstpart; q; q=q->next) ++n;
|
|
return (n);
|
|
}
|