diff --git a/CHANGES.rst b/CHANGES.rst index 40d508c..a0a7d59 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,10 @@ +2.0 (unreleased) +---------------- + +- Breaking change: inputs starting with ``"http://"`` or ``"https://"`` like + ``PyQuery("http://example.com")`` will no longer fetch the contents of the URL. + Users desiring the old behavior should switch to ``PyQuery(url="http://example.com")``. + 1.4.4 (unreleased) ------------------ diff --git a/docs/scrap.rst b/docs/scrap.rst index 86b0b6d..6a469d6 100644 --- a/docs/scrap.rst +++ b/docs/scrap.rst @@ -7,17 +7,17 @@ Scraping PyQuery is able to load an html document from a url:: - >>> pq(your_url) + >>> pq(url=your_url) [] By default it uses python's urllib. If `requests`_ is installed then it will use it. This allow you to use most of `requests`_ parameters:: - >>> pq(your_url, headers={'user-agent': 'pyquery'}) + >>> pq(url=your_url, headers={'user-agent': 'pyquery'}) [] - >>> pq(your_url, {'q': 'foo'}, method='post', verify=True) + >>> pq(url=your_url, data={'q': 'foo'}, method='post', verify=True) [] diff --git a/pyquery/pyquery.py b/pyquery/pyquery.py index 0d96445..13afbff 100644 --- a/pyquery/pyquery.py +++ b/pyquery/pyquery.py @@ -150,14 +150,6 @@ class PyQuery(list): self._base_url = None self.parser = kwargs.pop('parser', None) - if (len(args) >= 1 and - isinstance(args[0], str) and - args[0].split('://', 1)[0] in ('http', 'https')): - kwargs['url'] = args[0] - if len(args) >= 2: - kwargs['data'] = args[1] - args = [] - if 'parent' in kwargs: self._parent = kwargs.pop('parent') else: diff --git a/tests/test_pyquery.py b/tests/test_pyquery.py index ea0fc7b..1ab40e1 100644 --- a/tests/test_pyquery.py +++ b/tests/test_pyquery.py @@ -902,14 +902,14 @@ class TestWebScrapping(TestCase): self.application_url = self.s.application_url.rstrip('/') def test_get(self): - d = pq(self.application_url, {'q': 'foo'}, + d = pq(url=self.application_url, data={'q': 'foo'}, method='get') print(d) self.assertIn('REQUEST_METHOD: GET', d('p').text()) self.assertIn('q=foo', d('p').text()) def test_post(self): - d = pq(self.application_url, {'q': 'foo'}, + d = pq(url=self.application_url, data={'q': 'foo'}, method='post') self.assertIn('REQUEST_METHOD: POST', d('p').text()) self.assertIn('q=foo', d('p').text()) @@ -919,7 +919,7 @@ class TestWebScrapping(TestCase): import requests session = requests.Session() session.headers.update({'X-FOO': 'bar'}) - d = pq(self.application_url, {'q': 'foo'}, + d = pq(url=self.application_url, data={'q': 'foo'}, method='get', session=session) self.assertIn('HTTP_X_FOO: bar', d('p').text()) else: @@ -932,7 +932,7 @@ class TestWebScrapping(TestCase): class TestWebScrappingEncoding(TestCase): def test_get(self): - d = pq(u'http://ru.wikipedia.org/wiki/Заглавная_страница', + d = pq(url=u'http://ru.wikipedia.org/wiki/Заглавная_страница', method='get') print(d) self.assertEqual(d('#pt-login').text(), u'Войти') @@ -950,9 +950,9 @@ class TestWebScrappingTimeouts(TestCase): self.application_url = self.s.application_url.rstrip('/') def test_get(self): - pq(self.application_url) + pq(url=self.application_url) with self.assertRaises(Exception): - pq(self.application_url, timeout=1) + pq(url=self.application_url, timeout=1) def tearDown(self): self.s.shutdown()