Hi! I am running into some strange behavior on the hub configured in this repo: https://github.com/learning-2-learn/l2lhub-deployment. This is hubploy-deployed hub running on GCP with some extra pangeo-style scalable computing stuff.
In notebooks running on the hub, I am using gcsfs
to read and write data from/to GCS.
I run:
fs = gcsfs.GCSFileSystem(project='my-project-123456', token="cloud")
And then I can run things like:
fs.ls('mybucket/myfolder/')
without any issue. I can even do things like:
fs.get(os.path.join('mybucket/myfolder/', metadata_file), metadata_file)
But I can’t do:
fs.put(metadata_file, os.path.join('mybucket/myfolder/', metadata_file))
When I try doing that, I get:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
/srv/conda/envs/notebook/lib/python3.7/site-packages/fsspec/spec.py in put_file(self, lpath, rpath, **kwargs)
676 data = f1.read(self.blocksize)
--> 677 f2.write(data)
678
/srv/conda/envs/notebook/lib/python3.7/site-packages/fsspec/spec.py in write(self, data)
1228 if self.buffer.tell() >= self.blocksize:
-> 1229 self.flush()
1230 return out
/srv/conda/envs/notebook/lib/python3.7/site-packages/fsspec/spec.py in flush(self, force)
1263 self.offset = 0
-> 1264 self._initiate_upload()
1265
<decorator-gen-174> in _initiate_upload(self)
/srv/conda/envs/notebook/lib/python3.7/site-packages/gcsfs/core.py in _tracemethod(f, self, *args, **kwargs)
51
---> 52 return f(self, *args, **kwargs)
53
/srv/conda/envs/notebook/lib/python3.7/site-packages/gcsfs/core.py in _initiate_upload(self)
1225 uploadType="resumable",
-> 1226 json={"name": self.key, "metadata": self.metadata},
1227 )
<decorator-gen-157> in _call(self, method, path, *args, **kwargs)
/srv/conda/envs/notebook/lib/python3.7/site-packages/gcsfs/core.py in _tracemethod(f, self, *args, **kwargs)
51
---> 52 return f(self, *args, **kwargs)
53
/srv/conda/envs/notebook/lib/python3.7/site-packages/gcsfs/core.py in _call(self, method, path, *args, **kwargs)
535 )
--> 536 validate_response(r, path)
537 break
/srv/conda/envs/notebook/lib/python3.7/site-packages/gcsfs/core.py in validate_response(r, path)
172 elif r.status_code == 403:
--> 173 raise IOError("Forbidden: %s\n%s" % (path, msg))
174 elif r.status_code == 429:
OSError: Forbidden: https://www.googleapis.com/upload/storage/v1/b/learning2learn/o
Insufficient Permission
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-9-a20b3a2d2a55> in <module>
----> 1 fs.put(metadata_file, op.join('learning2learn/Buffalo/NWB-experiments/', metadata_file))
/srv/conda/envs/notebook/lib/python3.7/site-packages/fsspec/spec.py in put(self, lpath, rpath, recursive, **kwargs)
696
697 for lpath, rpath in zip(lpaths, rpaths):
--> 698 self.put_file(lpath, rpath, **kwargs)
699
700 def head(self, path, size=1024):
/srv/conda/envs/notebook/lib/python3.7/site-packages/fsspec/spec.py in put_file(self, lpath, rpath, **kwargs)
675 while data:
676 data = f1.read(self.blocksize)
--> 677 f2.write(data)
678
679 def put(self, lpath, rpath, recursive=False, **kwargs):
/srv/conda/envs/notebook/lib/python3.7/site-packages/fsspec/spec.py in __exit__(self, *args)
1422
1423 def __exit__(self, *args):
-> 1424 self.close()
/srv/conda/envs/notebook/lib/python3.7/site-packages/fsspec/spec.py in close(self)
1390 else:
1391 if not self.forced:
-> 1392 self.flush(force=True)
1393
1394 if self.fs is not None:
/srv/conda/envs/notebook/lib/python3.7/site-packages/fsspec/spec.py in flush(self, force)
1264 self._initiate_upload()
1265
-> 1266 if self._upload_chunk(final=force) is not False:
1267 self.offset += self.buffer.seek(0, 2)
1268 self.buffer = io.BytesIO()
<decorator-gen-173> in _upload_chunk(self, final)
/srv/conda/envs/notebook/lib/python3.7/site-packages/gcsfs/core.py in _tracemethod(f, self, *args, **kwargs)
50 logger.log(logging.DEBUG - 1, tb_io.getvalue())
51
---> 52 return f(self, *args, **kwargs)
53
54
/srv/conda/envs/notebook/lib/python3.7/site-packages/gcsfs/core.py in _upload_chunk(self, final)
1182 )
1183 r = self.gcsfs._call(
-> 1184 "POST", self.location, uploadType="resumable", headers=head, data=data
1185 )
1186 if "Range" in r.headers:
<decorator-gen-157> in _call(self, method, path, *args, **kwargs)
/srv/conda/envs/notebook/lib/python3.7/site-packages/gcsfs/core.py in _tracemethod(f, self, *args, **kwargs)
50 logger.log(logging.DEBUG - 1, tb_io.getvalue())
51
---> 52 return f(self, *args, **kwargs)
53
54
/srv/conda/envs/notebook/lib/python3.7/site-packages/gcsfs/core.py in _call(self, method, path, *args, **kwargs)
507 r = None
508
--> 509 if not path.startswith("http"):
510 path = self.base + path
511
AttributeError: 'NoneType' object has no attribute 'startswith'
In other experiments that I have conducted (with Dask and Zarr), I run into similar authentication issues. The VMs in question do have a service account that allows them to be “Editors”, so I assumed they should be able to do this kind of thing (and I seem to remember being able to do this in the past). Anyone have any ideas on how to debug this?