1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.datamodel.credential;
24
25 import java.util.HashMap;
26 import java.util.Iterator;
27 import java.util.Map;
28 import java.util.logging.Logger;
29
30 import javax.management.Attribute;
31 import javax.management.AttributeNotFoundException;
32
33 import org.apache.commons.httpclient.HttpClient;
34 import org.apache.commons.httpclient.HttpMethod;
35 import org.apache.commons.httpclient.HttpMethodBase;
36 import org.apache.commons.httpclient.NameValuePair;
37 import org.apache.commons.httpclient.URIException;
38 import org.apache.commons.httpclient.methods.GetMethod;
39 import org.apache.commons.httpclient.methods.PostMethod;
40 import org.apache.commons.lang.StringUtils;
41 import org.archive.crawler.datamodel.CrawlURI;
42 import org.archive.crawler.settings.MapType;
43 import org.archive.crawler.settings.SimpleType;
44 import org.archive.crawler.settings.Type;
45 import org.archive.net.UURI;
46 import org.archive.net.UURIFactory;
47
48
49
50 /***
51 * Credential that holds all needed to do a GET/POST to a HTML form.
52 *
53 * @author stack
54 * @version $Revision: 5913 $, $Date: 2008-07-28 22:34:52 +0000 (Mon, 28 Jul 2008) $
55 */
56 public class HtmlFormCredential extends Credential {
57
58 private static final long serialVersionUID = -4732570804435453949L;
59
60 private static final Logger logger =
61 Logger.getLogger(HtmlFormCredential.class.getName());
62
63 private static final String ATTR_LOGIN_URI = "login-uri";
64 private static final String ATTR_FORM_ITEMS = "form-items";
65 private static final String ATTR_FORM_METHOD = "http-method";
66 private static final String [] METHODS = {"POST", "GET"};
67
68 /***
69 * Constructor.
70 *
71 * A constructor that takes name of the credential is required by settings
72 * framework.
73 *
74 * @param name Name of this credential.
75 */
76 public HtmlFormCredential(final String name)
77 {
78 super(name, "Credential that has all necessary" +
79 " for running a POST/GET to an HTML login form.");
80
81 Type t = addElementToDefinition(new SimpleType("login-uri",
82 "Full URI of page that contains the HTML login form we're to" +
83 " apply these credentials too: E.g. http://www.archive.org", ""));
84 t.setOverrideable(false);
85 t.setExpertSetting(true);
86
87
88 t = addElementToDefinition(new SimpleType(ATTR_FORM_METHOD,
89 "GET or POST", METHODS[0], METHODS));
90 t.setOverrideable(false);
91 t.setExpertSetting(true);
92
93 t = addElementToDefinition(new MapType(ATTR_FORM_ITEMS, "Form items.",
94 String.class));
95 t.setOverrideable(false);
96 t.setExpertSetting(true);
97 }
98
99 /***
100 * @param context CrawlURI context to use.
101 * @return login-uri.
102 * @throws AttributeNotFoundException
103 */
104 public String getLoginUri(final CrawlURI context)
105 throws AttributeNotFoundException {
106 return (String)getAttribute(ATTR_LOGIN_URI, context);
107 }
108
109 /***
110 * @param context CrawlURI context to use.
111 * @return login-uri.
112 * @throws AttributeNotFoundException
113 */
114 public String getHttpMethod(final CrawlURI context)
115 throws AttributeNotFoundException {
116 return (String)getAttribute(ATTR_FORM_METHOD, context);
117 }
118
119 /***
120 * @param context CrawlURI context to use.
121 * @return Form inputs as convenient map. Returns null if no form items.
122 * @throws AttributeNotFoundException
123 */
124 public Map<String,Object> getFormItems(final CrawlURI context)
125 throws AttributeNotFoundException {
126 Map<String,Object> result = null;
127 MapType items = (MapType)getAttribute(ATTR_FORM_ITEMS, context);
128 if (items != null) {
129 for (Iterator i = items.iterator(context); i.hasNext();) {
130 Attribute a = (Attribute)i.next();
131 if (result == null) {
132 result = new HashMap<String,Object>();
133 }
134 result.put(a.getName(), a.getValue());
135 }
136 }
137 return result;
138 }
139
140 public boolean isPrerequisite(final CrawlURI curi) {
141 boolean result = false;
142 String curiStr = curi.getUURI().toString();
143 String loginUri = getPrerequisite(curi);
144 if (loginUri != null) {
145 try {
146 UURI uuri = UURIFactory.getInstance(curi.getUURI(), loginUri);
147 if (uuri != null && curiStr != null &&
148 uuri.toString().equals(curiStr)) {
149 result = true;
150 if (!curi.isPrerequisite()) {
151 curi.setPrerequisite(true);
152 logger.fine(curi + " is prereq.");
153 }
154 }
155 } catch (URIException e) {
156 logger.severe("Failed to uuri: " + curi + ", " +
157 e.getMessage());
158 }
159 }
160 return result;
161 }
162
163 public boolean hasPrerequisite(CrawlURI curi) {
164 return getPrerequisite(curi) != null;
165 }
166
167 public String getPrerequisite(CrawlURI curi) {
168 String loginUri = null;
169 try {
170 loginUri = getLoginUri(curi);
171 } catch (AttributeNotFoundException e) {
172 logger.severe("Failed to getLoginUri: " + this + ", " + curi + ","
173 + e.getMessage());
174
175
176
177 }
178 return loginUri;
179 }
180
181 public String getKey(CrawlURI curi) throws AttributeNotFoundException {
182 return getLoginUri(curi);
183 }
184
185 public boolean isEveryTime() {
186
187 return false;
188 }
189
190 public boolean populate(CrawlURI curi, HttpClient http, HttpMethod method,
191 String payload) {
192
193
194 boolean result = false;
195 Map formItems = null;
196 try {
197 formItems = getFormItems(curi);
198 }
199 catch (AttributeNotFoundException e1) {
200 logger.severe("Failed get of form items for " + curi);
201 }
202 if (formItems == null || formItems.size() <= 0) {
203 try {
204 logger.severe("No form items for " + method.getURI());
205 }
206 catch (URIException e) {
207 logger.severe("No form items and exception getting uri: " +
208 e.getMessage());
209 }
210 return result;
211 }
212
213 NameValuePair[] data = new NameValuePair[formItems.size()];
214 int index = 0;
215 String key = null;
216 for (Iterator i = formItems.keySet().iterator(); i.hasNext();) {
217 key = (String)i.next();
218 data[index++] = new NameValuePair(key, (String)formItems.get(key));
219 }
220 if (method instanceof PostMethod) {
221 ((PostMethod)method).setRequestBody(data);
222 result = true;
223 } else if (method instanceof GetMethod) {
224
225
226
227 HttpMethodBase hmb = (HttpMethodBase)method;
228 String currentQuery = hmb.getQueryString();
229 hmb.setQueryString(data);
230 String newQuery = hmb.getQueryString();
231 hmb.setQueryString(
232 ((StringUtils.isNotEmpty(currentQuery))
233 ? currentQuery + "&"
234 : "")
235 + newQuery);
236 result = true;
237 } else {
238 logger.severe("Unknown method type: " + method);
239 }
240 return result;
241 }
242
243 public boolean isPost(CrawlURI curi) {
244 String method = null;
245 try {
246 method = getHttpMethod(curi);
247 }
248 catch (AttributeNotFoundException e) {
249 logger.severe("Failed to get method for " + curi + ", " + this);
250 }
251 return method != null && method.equalsIgnoreCase("POST");
252 }
253 }