Require url inputs to be explicit

pull/222/head
Jack Cushman 1 year ago
parent 7096042c65
commit 2eebe5eee3
  1. 7
      CHANGES.rst
  2. 6
      docs/scrap.rst
  3. 8
      pyquery/pyquery.py
  4. 12
      tests/test_pyquery.py

@ -1,3 +1,10 @@
2.0 (unreleased)
----------------
- Breaking change: inputs starting with ``"http://"`` or ``"https://"`` like
``PyQuery("http://example.com")`` will no longer fetch the contents of the URL.
Users desiring the old behavior should switch to ``PyQuery(url="http://example.com")``.
1.4.4 (unreleased)
------------------

@ -7,17 +7,17 @@ Scraping
PyQuery is able to load an html document from a url::
>>> pq(your_url)
>>> pq(url=your_url)
[<html>]
By default it uses python's urllib.
If `requests`_ is installed then it will use it. This allow you to use most of `requests`_ parameters::
>>> pq(your_url, headers={'user-agent': 'pyquery'})
>>> pq(url=your_url, headers={'user-agent': 'pyquery'})
[<html>]
>>> pq(your_url, {'q': 'foo'}, method='post', verify=True)
>>> pq(url=your_url, data={'q': 'foo'}, method='post', verify=True)
[<html>]

@ -149,14 +149,6 @@ class PyQuery(list):
self._base_url = None
self.parser = kwargs.pop('parser', None)
if (len(args) >= 1 and
isinstance(args[0], str) and
args[0].split('://', 1)[0] in ('http', 'https')):
kwargs['url'] = args[0]
if len(args) >= 2:
kwargs['data'] = args[1]
args = []
if 'parent' in kwargs:
self._parent = kwargs.pop('parent')
else:

@ -894,14 +894,14 @@ class TestWebScrapping(TestCase):
self.application_url = self.s.application_url.rstrip('/')
def test_get(self):
d = pq(self.application_url, {'q': 'foo'},
d = pq(url=self.application_url, data={'q': 'foo'},
method='get')
print(d)
self.assertIn('REQUEST_METHOD: GET', d('p').text())
self.assertIn('q=foo', d('p').text())
def test_post(self):
d = pq(self.application_url, {'q': 'foo'},
d = pq(url=self.application_url, data={'q': 'foo'},
method='post')
self.assertIn('REQUEST_METHOD: POST', d('p').text())
self.assertIn('q=foo', d('p').text())
@ -911,7 +911,7 @@ class TestWebScrapping(TestCase):
import requests
session = requests.Session()
session.headers.update({'X-FOO': 'bar'})
d = pq(self.application_url, {'q': 'foo'},
d = pq(url=self.application_url, data={'q': 'foo'},
method='get', session=session)
self.assertIn('HTTP_X_FOO: bar', d('p').text())
else:
@ -924,7 +924,7 @@ class TestWebScrapping(TestCase):
class TestWebScrappingEncoding(TestCase):
def test_get(self):
d = pq(u'http://ru.wikipedia.org/wiki/Заглавная_страница',
d = pq(url=u'http://ru.wikipedia.org/wiki/Заглавная_страница',
method='get')
print(d)
self.assertEqual(d('#pt-login').text(), u'Войти')
@ -942,9 +942,9 @@ class TestWebScrappingTimeouts(TestCase):
self.application_url = self.s.application_url.rstrip('/')
def test_get(self):
pq(self.application_url)
pq(url=self.application_url)
with self.assertRaises(Exception):
pq(self.application_url, timeout=1)
pq(url=self.application_url, timeout=1)
def tearDown(self):
self.s.shutdown()

Loading…
Cancel
Save