commit
0e9b337cb8
|
@ -1,3 +1,10 @@
|
|||
2.0 (unreleased)
|
||||
----------------
|
||||
|
||||
- Breaking change: inputs starting with ``"http://"`` or ``"https://"`` like
|
||||
``PyQuery("http://example.com")`` will no longer fetch the contents of the URL.
|
||||
Users desiring the old behavior should switch to ``PyQuery(url="http://example.com")``.
|
||||
|
||||
1.4.4 (unreleased)
|
||||
------------------
|
||||
|
||||
|
|
|
@ -7,17 +7,17 @@ Scraping
|
|||
|
||||
PyQuery is able to load an html document from a url::
|
||||
|
||||
>>> pq(your_url)
|
||||
>>> pq(url=your_url)
|
||||
[<html>]
|
||||
|
||||
By default it uses python's urllib.
|
||||
|
||||
If `requests`_ is installed then it will use it. This allow you to use most of `requests`_ parameters::
|
||||
|
||||
>>> pq(your_url, headers={'user-agent': 'pyquery'})
|
||||
>>> pq(url=your_url, headers={'user-agent': 'pyquery'})
|
||||
[<html>]
|
||||
|
||||
>>> pq(your_url, {'q': 'foo'}, method='post', verify=True)
|
||||
>>> pq(url=your_url, data={'q': 'foo'}, method='post', verify=True)
|
||||
[<html>]
|
||||
|
||||
|
||||
|
|
|
@ -150,14 +150,6 @@ class PyQuery(list):
|
|||
self._base_url = None
|
||||
self.parser = kwargs.pop('parser', None)
|
||||
|
||||
if (len(args) >= 1 and
|
||||
isinstance(args[0], str) and
|
||||
args[0].split('://', 1)[0] in ('http', 'https')):
|
||||
kwargs['url'] = args[0]
|
||||
if len(args) >= 2:
|
||||
kwargs['data'] = args[1]
|
||||
args = []
|
||||
|
||||
if 'parent' in kwargs:
|
||||
self._parent = kwargs.pop('parent')
|
||||
else:
|
||||
|
|
|
@ -902,14 +902,14 @@ class TestWebScrapping(TestCase):
|
|||
self.application_url = self.s.application_url.rstrip('/')
|
||||
|
||||
def test_get(self):
|
||||
d = pq(self.application_url, {'q': 'foo'},
|
||||
d = pq(url=self.application_url, data={'q': 'foo'},
|
||||
method='get')
|
||||
print(d)
|
||||
self.assertIn('REQUEST_METHOD: GET', d('p').text())
|
||||
self.assertIn('q=foo', d('p').text())
|
||||
|
||||
def test_post(self):
|
||||
d = pq(self.application_url, {'q': 'foo'},
|
||||
d = pq(url=self.application_url, data={'q': 'foo'},
|
||||
method='post')
|
||||
self.assertIn('REQUEST_METHOD: POST', d('p').text())
|
||||
self.assertIn('q=foo', d('p').text())
|
||||
|
@ -919,7 +919,7 @@ class TestWebScrapping(TestCase):
|
|||
import requests
|
||||
session = requests.Session()
|
||||
session.headers.update({'X-FOO': 'bar'})
|
||||
d = pq(self.application_url, {'q': 'foo'},
|
||||
d = pq(url=self.application_url, data={'q': 'foo'},
|
||||
method='get', session=session)
|
||||
self.assertIn('HTTP_X_FOO: bar', d('p').text())
|
||||
else:
|
||||
|
@ -932,7 +932,7 @@ class TestWebScrapping(TestCase):
|
|||
class TestWebScrappingEncoding(TestCase):
|
||||
|
||||
def test_get(self):
|
||||
d = pq(u'http://ru.wikipedia.org/wiki/Заглавная_страница',
|
||||
d = pq(url=u'http://ru.wikipedia.org/wiki/Заглавная_страница',
|
||||
method='get')
|
||||
print(d)
|
||||
self.assertEqual(d('#pt-login').text(), u'Войти')
|
||||
|
@ -950,9 +950,9 @@ class TestWebScrappingTimeouts(TestCase):
|
|||
self.application_url = self.s.application_url.rstrip('/')
|
||||
|
||||
def test_get(self):
|
||||
pq(self.application_url)
|
||||
pq(url=self.application_url)
|
||||
with self.assertRaises(Exception):
|
||||
pq(self.application_url, timeout=1)
|
||||
pq(url=self.application_url, timeout=1)
|
||||
|
||||
def tearDown(self):
|
||||
self.s.shutdown()
|
||||
|
|
Loading…
Reference in New Issue