@manics, Thanks for the initial start, I hadn’t see that part but when you said it returns the token, I started playing a little bit.
I have added the /auth/bigquery
into the scope now and then I’ve been reading a post on how to convert a token to use for google.oauth2.credentials
which I believe is needed to use for bigquery.Client
;
import google.oauth2.credentials
credentials = google.oauth2.credentials.Credentials(
'access_token')
Taken from here
The problem I have is (amongst many!), I get the pop up for authentication, Big Query is now there as a service that access is requested for, but when I call the client.query
command, the process times out due to a Proxy error (or so it shows in JupyterHub debug);
`requests.exceptions.ProxyError: HTTPSConnectionPool(host='www.googleapis.com', port=443): Max retries exceeded with url: /bigquery/v2/projects/my-project-name/jobs (Caused by ProxyError('Cannot connect to proxy.', NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x7f64acda01d0>: Failed to establish a new connection: [Errno 110] Connection timed out',)))`
My google.py
code is below;
"""
Custom Authenticator to use Google OAuth with JupyterHub.
Derived from the GitHub OAuth authenticator.
"""
import os
import json
from tornado import gen
from tornado.auth import GoogleOAuth2Mixin
from tornado.web import HTTPError
from traitlets import Unicode, List, default, validate
from jupyterhub.auth import LocalAuthenticator
from jupyterhub.utils import url_path_join
from .oauth2 import OAuthLoginHandler, OAuthCallbackHandler, OAuthenticator
class GoogleLoginHandler(OAuthLoginHandler, GoogleOAuth2Mixin):
'''An OAuthLoginHandler that provides scope to GoogleOAuth2Mixin's
authorize_redirect.'''
@property
def scope(self):
return self.authenticator.scope
class GoogleOAuthHandler(OAuthCallbackHandler, GoogleOAuth2Mixin):
pass
class GoogleOAuthenticator(OAuthenticator, GoogleOAuth2Mixin):
login_handler = GoogleLoginHandler
callback_handler = GoogleOAuthHandler
@default('scope')
def _scope_default(self):
return ['https://www.googleapis.com/auth/bigquery','profile', 'openid', 'email']
#return ['profile','openid','email']
hosted_domain = List(
Unicode(),
config=True,
help="""List of domains used to restrict sign-in, e.g. mycollege.edu"""
)
@default('hosted_domain')
def _hosted_domain_from_env(self):
domains = []
for domain in os.environ.get('HOSTED_DOMAIN', '').split(';'):
if domain:
# check falsy to avoid trailing separators
# adding empty domains
domains.append(domain)
return domains
@validate('hosted_domain')
def _cast_hosted_domain(self, proposal):
"""handle backward-compatibility with hosted_domain is a single domain as a string"""
if isinstance(proposal.value, str):
# pre-0.9 hosted_domain was a string
# set it to a single item list
# (or if it's empty, an empty list)
if proposal.value == '':
return []
return [proposal.value]
return proposal.value
login_service = Unicode(
os.environ.get('LOGIN_SERVICE', 'Google'),
config=True,
help="""Google Apps hosted domain string, e.g. My College"""
)
async def authenticate(self, handler, data=None):
code = handler.get_argument("code")
handler.settings['google_oauth'] = {
'key': self.client_id,
'secret': self.client_secret,
'scope': self.scope,
}
user = await handler.get_authenticated_user(
redirect_uri=self.get_callback_url(handler),
code=code)
import google.oauth2.credentials
credentials = google.oauth2.credentials.Credentials(str(user['access_token']))
access_token = str(user['access_token'])
http_client = handler.get_auth_http_client()
response = await http_client.fetch(
self._OAUTH_USERINFO_URL + '?access_token=' + access_token
)
if not response:
handler.clear_all_cookies()
raise HTTPError(500, 'Google authentication failed')
#print(response)
from google.cloud import bigquery
client = bigquery.Client(project='my-project-name', credentials=credentials)
#print(client)
query_job = client.query("SELECT * FROM `my-project-name.test_dataset.fruits`")
results = query_job.result()
for row in results:
print("{} : {}".format(row.name, row.quantity))
bodyjs = json.loads(response.body.decode())
#print(bodyjs)
user_email = username = bodyjs['email']
user_email_domain = user_email.split('@')[1]
if not bodyjs['verified_email']:
self.log.warning("Google OAuth unverified email attempt: %s", user_email)
raise HTTPError(403,
"Google email {} not verified".format(user_email)
)
if self.hosted_domain:
if (
user_email_domain not in self.hosted_domain or
bodyjs['hd'] not in self.hosted_domain
):
self.log.warning("Google OAuth unauthorized domain attempt: %s", user_email)
raise HTTPError(403,
"Google account domain @{} not authorized.".format(user_email_domain)
)
if len(self.hosted_domain) >= 1:
# unambiguous domain, use only base name
username = user_email.split('@')[0]
return {
'name': username,
'auth_state': {
'access_token': access_token,
'google_user': bodyjs,
}
}
class LocalGoogleOAuthenticator(LocalAuthenticator, GoogleOAuthenticator):
"""A version that mixes in local system user creation"""
pass
When I run the following code in one of my Notebooks, it makes me do an offline verification (which I’m trying to avoid!), but works, so I think the Proxy Timeout is a red herring and it is just something with my poor code!
from google_auth_oauthlib import flow
from google.cloud import bigquery
launch_browser = False
#
# The `launch_browser` boolean variable indicates if a local server is used
# as the callback URL in the auth flow. A value of `True` is recommended,
# but a local server does not work if accessing the application remotely,
# such as over SSH or from a remote Jupyter notebook.
appflow = flow.InstalledAppFlow.from_client_secrets_file(
'client_secret_ihubglobal.json',
scopes=['https://www.googleapis.com/auth/bigquery'])
if launch_browser:
appflow.run_local_server()
else:
appflow.run_console()
credentials = appflow.credentials
client = bigquery.Client(credentials=credentials)
query_job = client.query("SELECT * FROM `my-project-name.test_dataset.fruits`")
results = query_job.result()
for row in results:
print("{} : {}".format(row.name, row.quantity))
Any advice? (apart from quit!)