Improve display of MARC records with multi-byte subfield IDs YAZ-695
[yaz-moved-to-github.git] / src / url.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2013 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file url.c
7  * \brief URL fetch utility
8  */
9 #if HAVE_CONFIG_H
10 #include <config.h>
11 #endif
12
13 #include <yaz/url.h>
14 #include <yaz/comstack.h>
15 #include <yaz/log.h>
16 #include <yaz/wrbuf.h>
17
18 struct yaz_url {
19     ODR odr_in;
20     ODR odr_out;
21     char *proxy;
22     int max_redirects;
23     WRBUF w_error;
24 };
25
26 yaz_url_t yaz_url_create(void)
27 {
28     yaz_url_t p = xmalloc(sizeof(*p));
29     p->odr_in = odr_createmem(ODR_DECODE);
30     p->odr_out = odr_createmem(ODR_ENCODE);
31     p->proxy = 0;
32     p->max_redirects = 10;
33     p->w_error = wrbuf_alloc();
34     return p;
35 }
36
37 void yaz_url_destroy(yaz_url_t p)
38 {
39     if (p)
40     {
41         odr_destroy(p->odr_in);
42         odr_destroy(p->odr_out);
43         xfree(p->proxy);
44         wrbuf_destroy(p->w_error);
45         xfree(p);
46     }
47 }
48
49 void yaz_url_set_proxy(yaz_url_t p, const char *proxy)
50 {
51     xfree(p->proxy);
52     p->proxy = 0;
53     if (proxy && *proxy)
54         p->proxy = xstrdup(proxy);
55 }
56
57 void yaz_url_set_max_redirects(yaz_url_t p, int num)
58 {
59     p->max_redirects = num;
60 }
61
62 static void extract_user_pass(NMEM nmem,
63                               const char *uri,
64                               char **uri_lean, char **http_user,
65                               char **http_pass)
66 {
67     const char *cp1 = strchr(uri, '/');
68     *uri_lean = 0;
69     *http_user = 0;
70     *http_pass = 0;
71     if (cp1 && cp1 > uri)
72     {
73         cp1--;
74
75         if (!strncmp(cp1, "://", 3))
76         {
77             const char *cp3 = 0;
78             const char *cp2 = cp1 + 3;
79             while (*cp2 && *cp2 != '/' && *cp2 != '@')
80             {
81                 if (*cp2 == ':')
82                     cp3 = cp2;
83                 cp2++;
84             }
85             if (*cp2 == '@' && cp3)
86             {
87                 *uri_lean = nmem_malloc(nmem, strlen(uri) + 1);
88                 memcpy(*uri_lean, uri, cp1 + 3 - uri);
89                 strcpy(*uri_lean + (cp1 + 3 - uri), cp2 + 1);
90
91                 *http_user = nmem_strdupn(nmem, cp1 + 3, cp3 - (cp1 + 3));
92                 *http_pass = nmem_strdupn(nmem, cp3 + 1, cp2 - (cp3 + 1));
93             }
94         }
95     }
96     if (*uri_lean == 0)
97         *uri_lean = nmem_strdup(nmem, uri);
98 }
99
100 const char *yaz_url_get_error(yaz_url_t p)
101 {
102     return wrbuf_cstr(p->w_error);
103 }
104
105 static void log_warn(yaz_url_t p)
106 {
107     yaz_log(YLOG_WARN, "yaz_url: %s", wrbuf_cstr(p->w_error));
108 }
109
110 Z_HTTP_Response *yaz_url_exec(yaz_url_t p, const char *uri,
111                               const char *method,
112                               Z_HTTP_Header *user_headers,
113                               const char *buf, size_t len)
114 {
115     Z_HTTP_Response *res = 0;
116     int number_of_redirects = 0;
117
118     wrbuf_rewind(p->w_error);
119     while (1)
120     {
121         void *add;
122         COMSTACK conn = 0;
123         int code;
124         const char *location = 0;
125         char *http_user = 0;
126         char *http_pass = 0;
127         char *uri_lean = 0;
128         Z_GDU *gdu;
129
130         extract_user_pass(p->odr_out->mem, uri, &uri_lean,
131                           &http_user, &http_pass);
132
133         gdu = z_get_HTTP_Request_uri(p->odr_out, uri_lean, 0, p->proxy ? 1 : 0);
134         gdu->u.HTTP_Request->method = odr_strdup(p->odr_out, method);
135
136         for ( ; user_headers; user_headers = user_headers->next)
137         {
138             /* prefer new Host over user-supplied Host */
139             if (!strcmp(user_headers->name, "Host"))
140                 ;
141             /* prefer user-supplied User-Agent over YAZ' own */
142             else if (!strcmp(user_headers->name, "User-Agent"))
143                 z_HTTP_header_set(p->odr_out, &gdu->u.HTTP_Request->headers,
144                                   user_headers->name, user_headers->value);
145             else
146                 z_HTTP_header_add(p->odr_out, &gdu->u.HTTP_Request->headers,
147                                   user_headers->name, user_headers->value);
148         }
149         if (http_user && http_pass)
150             z_HTTP_header_add_basic_auth(p->odr_out,
151                                          &gdu->u.HTTP_Request->headers,
152                                          http_user, http_pass);
153
154         res = 0;
155         if (buf && len)
156         {
157             gdu->u.HTTP_Request->content_buf = (char *) buf;
158             gdu->u.HTTP_Request->content_len = len;
159         }
160         if (!z_GDU(p->odr_out, &gdu, 0, 0))
161         {
162             wrbuf_printf(p->w_error, "Can not encode HTTP request for URL %s",
163                          uri);
164             log_warn(p);
165             return 0;
166         }
167         conn = cs_create_host_proxy(uri_lean, 1, &add, p->proxy);
168         if (!conn)
169         {
170             wrbuf_printf(p->w_error, "Can not resolve URL %s", uri);
171             log_warn(p);
172         }
173         else if (cs_connect(conn, add) < 0)
174         {
175             wrbuf_printf(p->w_error, "Can not connect to URL %s", uri);
176             log_warn(p);
177         }
178         else
179         {
180             int len;
181             char *buf = odr_getbuf(p->odr_out, &len, 0);
182
183             if (cs_put(conn, buf, len) < 0)
184             {
185                 wrbuf_printf(p->w_error, "cs_put fail for URL %s", uri);
186                 log_warn(p);
187             }
188             else
189             {
190                 char *netbuffer = 0;
191                 int netlen = 0;
192                 int cs_res = cs_get(conn, &netbuffer, &netlen);
193                 if (cs_res <= 0)
194                 {
195                     wrbuf_printf(p->w_error, "cs_get failed for URL %s", uri);
196                     log_warn(p);
197                 }
198                 else
199                 {
200                     Z_GDU *gdu;
201                     odr_setbuf(p->odr_in, netbuffer, cs_res, 0);
202                     if (!z_GDU(p->odr_in, &gdu, 0, 0)
203                         || gdu->which != Z_GDU_HTTP_Response)
204                     {
205                         wrbuf_printf(p->w_error, "HTTP decoding fail for "
206                                      "URL %s", uri);
207                         log_warn(p);
208                     }
209                     else
210                     {
211                         res = gdu->u.HTTP_Response;
212                     }
213                 }
214                 xfree(netbuffer);
215             }
216         }
217         if (conn)
218             cs_close(conn);
219         if (!res)
220             break;
221         code = res->code;
222         location = z_HTTP_header_lookup(res->headers, "Location");
223         if (++number_of_redirects <= p->max_redirects &&
224             location && (code == 301 || code == 302 || code == 307))
225         {
226             odr_reset(p->odr_out);
227             uri = odr_strdup(p->odr_out, location);
228             odr_reset(p->odr_in);
229         }
230         else
231             break;
232     }
233     return res;
234 }
235
236 /*
237  * Local variables:
238  * c-basic-offset: 4
239  * c-file-style: "Stroustrup"
240  * indent-tabs-mode: nil
241  * End:
242  * vim: shiftwidth=4 tabstop=8 expandtab
243  */
244