samedi 6 décembre 2014

Premium Web of Knowledge API python access


Vote count:

0




My university bought the premium Web Services of Thompson-Reuters (Web of Knowledge, a.k.a Web of Sciences, I've uploaded the latest guide book on Google Drive) and I tried to retrieve some publication information. I found the python script by domoritz and it's for Web Services Lite. I modified it a little bit:



#!/usr/bin/env python
# -*- coding: utf-8 -*-

from suds.client import Client
from suds.transport.http import HttpTransport
import urllib2

class HTTPSudsPreprocessor(urllib2.BaseHandler):
def __init__(self, SID):
self.SID = SID

def http_request(self, req):
req.add_header('cookie', 'SID="'+self.SID+'"')
return req

https_request = http_request


class WokmwsSoapClient():
"""
main steps you have to do:
soap = WokmwsSoapClient()
results = soap.search(...)
"""
def __init__(self):
self.url = self.client = {}
self.SID = ''

self.url['auth'] = 'http://ift.tt/1tZgd5O'
self.url['search'] = 'http://ift.tt/1qCUUrs'

self.prepare()

def __del__(self):
self.close()

def prepare(self):
"""does all the initialization we need for a request"""
self.initAuthClient()
self.authenticate()
self.initSearchClient()

def initAuthClient(self):
self.client['auth'] = Client(self.url['auth'])

def initSearchClient(self):
http = HttpTransport()
opener = urllib2.build_opener(HTTPSudsPreprocessor(self.SID))
http.urlopener = opener
self.client['search'] = Client(self.url['search'], transport = http)

def authenticate(self):
self.SID = self.client['auth'].service.authenticate()

def close(self):
self.client['auth'].service.closeSession()

def search(self, query, chkpt):
qparams = {
'databaseId' : 'WOS',
'userQuery' : query,
'queryLanguage' : 'en',
'editions' : [{
'collection' : 'WOS',
'edition' : 'SCI',
},{
'collection' : 'WOS',
'edition' : 'SSCI',
}]
}

rparams = {
'count' : 100, # 1-100
'firstRecord' : chkpt,
'sortField' : [{
'name' : 'RS',
'sort' : 'D',
}]
}

return self.client['search'].service.search(qparams, rparams)

def citedReferences(self, query, chkpt):
qparams = {
'databaseId' : 'WOS',
'uid' : query,
'queryLanguage' : 'en'
}

rparams = {
'count' : 100, # 1-100
'firstRecord' : chkpt
}

return self.client['search'].service.citedReferences(qparams, rparams)


The search function for abstract info is fine but the citedReferences can't like suds.TypeNotFound: 'databaseId'. I'm sure databaseId is one of the parameters (page 12 in the guide).



<soapenv:Envelope xmlns:soapenv="http://ift.tt/sVJIaE"
xmlns:woksearch="http://ift.tt/1xxc4tr">
<soapenv:Header/>
<soapenv:Body>
<woksearch:citedReferences>
<databaseId>WOS</databaseId>
<uid>WOS:000270372400005</uid>
<queryLanguage>en</queryLanguage>
<retrieveParameters>
<firstRecord>1</firstRecord>
<count>100</count>
<option>
<key>Hot</key>
<value>On</value>
</option>
</retrieveParameters>
</woksearch:citedReferences>
</soapenv:Body>
</soapenv:Envelope>


The official example is a little different from the one for search (page 21):



<soapenv:Envelope xmlns:soapenv="http://ift.tt/sVJIaE"
xmlns:woksearch="http://ift.tt/1xxc4tr">
<soapenv:Header/>
<soapenv:Body>
<woksearch:search>
<queryParameters>
<databaseId>WOS</databaseId>
<userQuery>TS=(cadmium OR lead)</userQuery>
<editions>
<collection>WOS</collection>
<edition>SCI</edition>
</editions>
<timeSpan>
<begin>2000-01-01</begin>
<end>2011-12-31</end>
</timeSpan>
<queryLanguage>en</queryLanguage>
</queryParameters>
<retrieveParameters>
<firstRecord>1</firstRecord>
<count>5</count>
 <option>
<key>RecordIDs</key>
<value>On</value>
</option>
<option>
<key>targetNamespace</key>
<value>http://ift.tt/1q6EGJr;
</option>
</retrieveParameters>
</woksearch:search>
</soapenv:Body>
</soapenv:Envelope>


In citedReferences there's no queryParameters, all databaseId, uid, queryLanguage are parallel variables. I tried like this:



def citedReferences(self, query, chkpt):
databaseId = 'WOS'
uid = query
queryLanguage = 'en'

rparams = {
'count' : 100, # 1-100
'firstRecord' : chkpt
}

return self.client['search'].service.citedReferences(uid, rparams)


It returned suds.TypeNotFound: 'count'. Is there any line I should add/modify to make the script work? Thanks!



asked 2 mins ago

leoce

108






Premium Web of Knowledge API python access

Aucun commentaire:

Enregistrer un commentaire