From 0472bd0a6440c49cb47da229c56ec164d6e06cd5 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Thu, 23 Nov 2023 11:31:37 +0800 Subject: [PATCH] Change md5 to sha256 --- docs/source/developers/contents.rst | 10 ++-- jupyter_server/services/api/api.yaml | 12 ++--- jupyter_server/services/contents/fileio.py | 16 +++--- .../services/contents/filemanager.py | 54 +++++++++---------- jupyter_server/services/contents/handlers.py | 34 ++++++------ jupyter_server/services/contents/manager.py | 14 ++--- tests/services/contents/test_api.py | 12 ++--- tests/services/contents/test_fileio.py | 14 +++-- tests/services/contents/test_manager.py | 8 +-- 9 files changed, 90 insertions(+), 84 deletions(-) diff --git a/docs/source/developers/contents.rst b/docs/source/developers/contents.rst index ca88025c88..79551eff42 100644 --- a/docs/source/developers/contents.rst +++ b/docs/source/developers/contents.rst @@ -63,7 +63,7 @@ Models may contain the following entries: | |``None`` |if any. (:ref:`See | | | |Below`) | +--------------------+-----------+------------------------------+ -|**md5** |unicode or |The md5 of the contents. | +|**sha256** |unicode or |The sha256 of the contents. | | |``None`` | | | | | | +--------------------+-----------+------------------------------+ @@ -80,7 +80,7 @@ model. There are three model types: **notebook**, **file**, and **directory**. :class:`nbformat.notebooknode.NotebookNode` representing the .ipynb file represented by the model. See the `NBFormat`_ documentation for a full description. - - The ``md5`` field a hexdigest string of the md5 value of the notebook + - The ``sha256`` field a hexdigest string of the sha256 value of the notebook file. - ``file`` models @@ -91,14 +91,14 @@ model. There are three model types: **notebook**, **file**, and **directory**. file models, ``content`` simply contains the file's bytes after decoding as UTF-8. Non-text (``base64``) files are read as bytes, base64 encoded, and then decoded as UTF-8. - - The ``md5`` field a hexdigest string of the md5 value of the file. + - The ``sha256`` field a hexdigest string of the sha256 value of the file. - ``directory`` models - The ``format`` field is always ``"json"``. - The ``mimetype`` field is always ``None``. - The ``content`` field contains a list of :ref:`content-free` models representing the entities in the directory. - - The ``md5`` field is always ``None``. + - The ``sha256`` field is always ``None``. .. note:: @@ -137,7 +137,7 @@ model. There are three model types: **notebook**, **file**, and **directory**. "path": "foo/a.ipynb", "type": "notebook", "writable": True, - "md5": "7e47382b370c05a1b14706a2a8aff91a", + "sha256": "f5e43a0b1c2e7836ab3b4d6b1c35c19e2558688de15a6a14e137a59e4715d34b", } # Notebook Model without Content diff --git a/jupyter_server/services/api/api.yaml b/jupyter_server/services/api/api.yaml index 0394093ba9..35010d02a6 100644 --- a/jupyter_server/services/api/api.yaml +++ b/jupyter_server/services/api/api.yaml @@ -106,9 +106,9 @@ paths: in: query description: "Return content (0 for no content, 1 for return content)" type: integer - - name: md5 + - name: sha256 in: query - description: "Return md5 hexdigest string of content (0 for no md5, 1 for return md5)" + description: "Return sha256 hexdigest string of content (0 for no sha256, 1 for return sha256)" type: integer responses: 404: @@ -889,7 +889,7 @@ definitions: kernel: $ref: "#/definitions/Kernel" Contents: - description: "A contents object. The content and format keys may be null if content is not contained. The md5 maybe null if md5 is not contained. If type is 'file', then the mimetype will be null." + description: "A contents object. The content and format keys may be null if content is not contained. The sha256 maybe null if sha256 is not contained. If type is 'file', then the mimetype will be null." type: object required: - type @@ -901,7 +901,7 @@ definitions: - mimetype - format - content - - md5 + - sha256 properties: name: type: string @@ -939,9 +939,9 @@ definitions: format: type: string description: Format of content (one of null, 'text', 'base64', 'json') - md5: + sha256: type: string - description: "The md5 hexdigest string of content, if requested (otherwise null)." + description: "The sha256 hexdigest string of content, if requested (otherwise null)." Checkpoints: description: A checkpoint object. type: object diff --git a/jupyter_server/services/contents/fileio.py b/jupyter_server/services/contents/fileio.py index 5f0aa4a8bf..e8a7f9ff9f 100644 --- a/jupyter_server/services/contents/fileio.py +++ b/jupyter_server/services/contents/fileio.py @@ -357,11 +357,11 @@ def _save_file(self, os_path, content, format): with self.atomic_writing(os_path, text=False) as f: f.write(bcontent) - def _get_md5(self, os_path): + def _get_sha256(self, os_path): c, _ = self._read_file(os_path, "byte") - md5 = hashlib.md5() - md5.update(c) - return md5.hexdigest() + sha256 = hashlib.sha256() + sha256.update(c) + return sha256.hexdigest() class AsyncFileManagerMixin(FileManagerMixin): @@ -475,8 +475,8 @@ async def _save_file(self, os_path, content, format): with self.atomic_writing(os_path, text=False) as f: await run_sync(f.write, bcontent) - async def _get_md5(self, os_path): + async def _get_sha256(self, os_path): c, _ = await self._read_file(os_path, "byte") - md5 = hashlib.md5() - await run_sync(md5.update, c) - return md5.hexdigest() + sha256 = hashlib.sha256() + await run_sync(sha256.update, c) + return sha256.hexdigest() diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index 94d264e243..832413626c 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -48,7 +48,7 @@ class FileContentsManager(FileManagerMixin, ContentsManager): root_dir = Unicode(config=True) max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied") - support_md5 = Bool(True, config=False, help="Support md5 argument in `get`") + support_sha256 = Bool(True, config=False, help="Support sha256 argument in `get`") @default("root_dir") def _default_root_dir(self): @@ -269,7 +269,7 @@ def _base_model(self, path): model["mimetype"] = None model["size"] = size model["writable"] = self.is_writable(path) - model["md5"] = None + model["sha256"] = None return model @@ -337,7 +337,7 @@ def _dir_model(self, path, content=True): return model - def _file_model(self, path, content=True, format=None, md5=False): + def _file_model(self, path, content=True, format=None, sha256=False): """Build a model for a file if content is requested, include the file contents. @@ -366,13 +366,13 @@ def _file_model(self, path, content=True, format=None, md5=False): content=content, format=format, ) - if md5: - md5 = self._get_md5(os_path) - model.update(md5=md5) + if sha256: + sha256 = self._get_sha256(os_path) + model.update(sha256=sha256) return model - def _notebook_model(self, path, content=True, md5=False): + def _notebook_model(self, path, content=True, sha256=False): """Build a notebook model if content is requested, the notebook content will be populated @@ -391,12 +391,12 @@ def _notebook_model(self, path, content=True, md5=False): model["content"] = nb model["format"] = "json" self.validate_notebook_model(model, validation_error) - if md5: - model["md5"] = self._get_md5(os_path) + if sha256: + model["sha256"] = self._get_sha256(os_path) return model - def get(self, path, content=True, type=None, format=None, md5=None): + def get(self, path, content=True, type=None, format=None, sha256=None): """Takes a path for an entity and returns its model Parameters @@ -411,8 +411,8 @@ def get(self, path, content=True, type=None, format=None, md5=None): format : str, optional The requested format for file contents. 'text' or 'base64'. Ignored if this returns a notebook or directory model. - md5: bool, optional - Whether to include the md5 of the file contents. + sha256: bool, optional + Whether to include the sha256 of the file contents. Returns ------- @@ -440,11 +440,11 @@ def get(self, path, content=True, type=None, format=None, md5=None): ) model = self._dir_model(path, content=content) elif type == "notebook" or (type is None and path.endswith(".ipynb")): - model = self._notebook_model(path, content=content, md5=md5) + model = self._notebook_model(path, content=content, sha256=sha256) else: if type == "directory": raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type") - model = self._file_model(path, content=content, format=format, md5=md5) + model = self._file_model(path, content=content, format=format, sha256=sha256) self.emit(data={"action": "get", "path": path}) return model @@ -726,7 +726,7 @@ def _human_readable_size(self, size): class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager): """An async file contents manager.""" - support_md5 = Bool(True, config=False, help="Support md5 argument in `get`") + support_sha256 = Bool(True, config=False, help="Support sha256 argument in `get`") @default("checkpoints_class") def _checkpoints_class_default(self): @@ -797,7 +797,7 @@ async def _dir_model(self, path, content=True): return model - async def _file_model(self, path, content=True, format=None, md5=False): + async def _file_model(self, path, content=True, format=None, sha256=False): """Build a model for a file if content is requested, include the file contents. @@ -826,13 +826,13 @@ async def _file_model(self, path, content=True, format=None, md5=False): content=content, format=format, ) - if md5: - md5 = await self._get_md5(os_path) - model.update(md5=md5) + if sha256: + sha256 = await self._get_sha256(os_path) + model.update(sha256=sha256) return model - async def _notebook_model(self, path, content=True, md5=False): + async def _notebook_model(self, path, content=True, sha256=False): """Build a notebook model if content is requested, the notebook content will be populated @@ -851,12 +851,12 @@ async def _notebook_model(self, path, content=True, md5=False): model["content"] = nb model["format"] = "json" self.validate_notebook_model(model, validation_error) - if md5: - model["md5"] = await self._get_md5(os_path) + if sha256: + model["sha256"] = await self._get_sha256(os_path) return model - async def get(self, path, content=True, type=None, format=None, md5=False): + async def get(self, path, content=True, type=None, format=None, sha256=False): """Takes a path for an entity and returns its model Parameters @@ -871,8 +871,8 @@ async def get(self, path, content=True, type=None, format=None, md5=False): format : str, optional The requested format for file contents. 'text' or 'base64'. Ignored if this returns a notebook or directory model. - md5: bool, optional - Whether to include the md5 of the file contents. + sha256: bool, optional + Whether to include the sha256 of the file contents. Returns ------- @@ -895,11 +895,11 @@ async def get(self, path, content=True, type=None, format=None, md5=False): ) model = await self._dir_model(path, content=content) elif type == "notebook" or (type is None and path.endswith(".ipynb")): - model = await self._notebook_model(path, content=content, md5=md5) + model = await self._notebook_model(path, content=content, sha256=sha256) else: if type == "directory": raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type") - model = await self._file_model(path, content=content, format=format, md5=md5) + model = await self._file_model(path, content=content, format=format, sha256=sha256) self.emit(data={"action": "get", "path": path}) return model diff --git a/jupyter_server/services/contents/handlers.py b/jupyter_server/services/contents/handlers.py index 00d45ca629..3534e8207d 100644 --- a/jupyter_server/services/contents/handlers.py +++ b/jupyter_server/services/contents/handlers.py @@ -44,14 +44,14 @@ def _validate_in_or_not(expect_in_model: bool, model: Dict[str, Any], maybe_none ) -def validate_model(model, expect_content, expect_md5): +def validate_model(model, expect_content, expect_sha256): """ Validate a model returned by a ContentsManager method. If expect_content is True, then we expect non-null entries for 'content' and 'format'. - If expect_md5 is True, then we expect non-null entries for 'md5'. + If expect_sha256 is True, then we expect non-null entries for 'sha256'. """ required_keys = { "name", @@ -63,7 +63,7 @@ def validate_model(model, expect_content, expect_md5): "mimetype", "content", "format", - "md5", + "sha256", } missing = required_keys - set(model.keys()) if missing: @@ -73,9 +73,9 @@ def validate_model(model, expect_content, expect_md5): ) content_keys = ["content", "format"] - md5_keys = ["md5"] + sha256_keys = ["sha256"] _validate_in_or_not(expect_content, model, content_keys) - _validate_in_or_not(expect_md5, model, md5_keys) + _validate_in_or_not(expect_sha256, model, sha256_keys) class ContentsAPIHandler(APIHandler): @@ -136,10 +136,10 @@ async def get(self, path=""): raise web.HTTPError(400, "Content %r is invalid" % content_str) content = int(content_str or "") - md5_str = self.get_query_argument("md5", default="0") - if md5_str not in {"0", "1"}: - raise web.HTTPError(400, "Content %r is invalid" % md5_str) - md5 = int(md5_str or "") + sha256_str = self.get_query_argument("sha256", default="0") + if sha256_str not in {"0", "1"}: + raise web.HTTPError(400, "Content %r is invalid" % sha256_str) + sha256 = int(sha256_str or "") if not cm.allow_hidden and await ensure_async(cm.is_hidden(path)): await self._finish_error( @@ -151,12 +151,12 @@ async def get(self, path=""): "format": format, "content": content, } - if cm.support_md5: - kwargs["md5"] = md5 + if cm.support_sha256: + kwargs["sha256"] = sha256 try: model = await ensure_async(self.contents_manager.get(**kwargs)) - validate_model(model, expect_content=content, expect_md5=md5) + validate_model(model, expect_content=content, expect_sha256=sha256) self._finish_model(model, location=False) except web.HTTPError as exc: # 404 is okay in this context, catch exception and return 404 code to prevent stack trace on client @@ -186,7 +186,7 @@ async def patch(self, path=""): raise web.HTTPError(400, f"Cannot rename file or directory {path!r}") model = await ensure_async(cm.update(model, path)) - validate_model(model, expect_content=False, expect_md5=False) + validate_model(model, expect_content=False, expect_sha256=False) self._finish_model(model) async def _copy(self, copy_from, copy_to=None): @@ -199,7 +199,7 @@ async def _copy(self, copy_from, copy_to=None): ) model = await ensure_async(self.contents_manager.copy(copy_from, copy_to)) self.set_status(201) - validate_model(model, expect_content=False, expect_md5=False) + validate_model(model, expect_content=False, expect_sha256=False) self._finish_model(model) async def _upload(self, model, path): @@ -207,7 +207,7 @@ async def _upload(self, model, path): self.log.info("Uploading file to %s", path) model = await ensure_async(self.contents_manager.new(model, path)) self.set_status(201) - validate_model(model, expect_content=False, expect_md5=False) + validate_model(model, expect_content=False, expect_sha256=False) self._finish_model(model) async def _new_untitled(self, path, type="", ext=""): @@ -217,7 +217,7 @@ async def _new_untitled(self, path, type="", ext=""): self.contents_manager.new_untitled(path=path, type=type, ext=ext) ) self.set_status(201) - validate_model(model, expect_content=False, expect_md5=False) + validate_model(model, expect_content=False, expect_sha256=False) self._finish_model(model) async def _save(self, model, path): @@ -226,7 +226,7 @@ async def _save(self, model, path): if not chunk or chunk == -1: # Avoid tedious log information self.log.info("Saving file at %s", path) model = await ensure_async(self.contents_manager.save(model, path)) - validate_model(model, expect_content=False, expect_md5=False) + validate_model(model, expect_content=False, expect_sha256=False) self._finish_model(model) @web.authenticated diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 5bf1a131bb..382154b7f3 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -111,7 +111,7 @@ def _validate_preferred_dir(self, proposal): return value allow_hidden = Bool(False, config=True, help="Allow access to hidden files") - support_md5 = Bool(False, config=False, help="Support md5 argument in `get`") + support_sha256 = Bool(False, config=False, help="Support sha256 argument in `get`") notary = Instance(sign.NotebookNotary) @@ -452,9 +452,9 @@ def get(self, path, content=True, type=None, format=None): """ Get a file or directory model. - If a ContentManager supports calculating the md5 value of a file, - `ContentManager.support_md5` should be True and this function will accept an `md5` parameter, - will return a dict with an `md5` key. + If a ContentManager supports calculating the sha256 value of a file, + `ContentManager.support_sha256` should be True and this function will accept an `sha256` parameter, + will return a dict with an `sha256` key. """ raise NotImplementedError @@ -860,9 +860,9 @@ async def get(self, path, content=True, type=None, format=None): """ Get a file or directory model. - If a ContentManager supports calculating the md5 value of a file, - ContentManager.support_md5 should be True and this function will accept an md5 parameter, - will return a dict with an 'md5' key. + If a ContentManager supports calculating the sha256 value of a file, + ContentManager.support_sha256 should be True and this function will accept an sha256 parameter, + will return a dict with an 'sha256' key. """ raise NotImplementedError diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index eb93fd7526..4d75d4ce66 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -103,15 +103,15 @@ async def test_get_nb_contents(jp_fetch, contents, path, name): @pytest.mark.parametrize("path,name", dirs) -async def test_get_nb_md5(jp_fetch, contents, path, name): +async def test_get_nb_sha256(jp_fetch, contents, path, name): nbname = name + ".ipynb" nbpath = (path + "/" + nbname).lstrip("/") - r = await jp_fetch("api", "contents", nbpath, method="GET", params=dict(md5="1")) + r = await jp_fetch("api", "contents", nbpath, method="GET", params=dict(sha256="1")) model = json.loads(r.body.decode()) assert model["name"] == nbname assert model["path"] == nbpath assert model["type"] == "notebook" - assert "md5" in model + assert "sha256" in model assert "metadata" in model["content"] assert isinstance(model["content"]["metadata"], dict) @@ -201,14 +201,14 @@ async def test_get_text_file_contents(jp_fetch, contents, path, name): @pytest.mark.parametrize("path,name", dirs) -async def test_get_text_file_md5(jp_fetch, contents, path, name): +async def test_get_text_file_sha256(jp_fetch, contents, path, name): txtname = name + ".txt" txtpath = (path + "/" + txtname).lstrip("/") - r = await jp_fetch("api", "contents", txtpath, method="GET", params=dict(md5="1")) + r = await jp_fetch("api", "contents", txtpath, method="GET", params=dict(sha256="1")) model = json.loads(r.body.decode()) assert model["name"] == txtname assert model["path"] == txtpath - assert "md5" in model + assert "sha256" in model assert model["format"] == "text" assert model["type"] == "file" diff --git a/tests/services/contents/test_fileio.py b/tests/services/contents/test_fileio.py index 19060db94a..6ec46f5d00 100644 --- a/tests/services/contents/test_fileio.py +++ b/tests/services/contents/test_fileio.py @@ -142,8 +142,11 @@ def test_file_manager_mixin(tmpdir): mixin.log = logging.getLogger() bad_content = tmpdir / "bad_content.ipynb" bad_content.write_text("{}", "utf8") - # Same as `echo -n {} | md5sum` - assert mixin._get_md5(bad_content) == "99914b932bd37a50b983c5e7c90ae93b" + # Same as `echo -n {} | sha256sum` + assert ( + mixin._get_sha256(bad_content) + == "44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a" + ) with pytest.raises(HTTPError): mixin._read_notebook(bad_content) other = path_to_intermediate(bad_content) @@ -166,8 +169,11 @@ async def test_async_file_manager_mixin(tmpdir): mixin.log = logging.getLogger() bad_content = tmpdir / "bad_content.ipynb" bad_content.write_text("{}", "utf8") - # Same as `echo -n {} | md5sum` - assert await mixin._get_md5(bad_content) == "99914b932bd37a50b983c5e7c90ae93b" + # Same as `echo -n {} | sha256sum` + assert ( + await mixin._get_sha256(bad_content) + == "44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a" + ) with pytest.raises(HTTPError): await mixin._read_notebook(bad_content) other = path_to_intermediate(bad_content) diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 3d11a43ad0..62377041e8 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -571,8 +571,8 @@ async def test_get(jp_contents_manager): nb_as_bin_file = await ensure_async(cm.get(path, content=True, type="file", format="base64")) assert nb_as_bin_file["format"] == "base64" - nb_with_md5 = await ensure_async(cm.get(path, md5=True)) - assert nb_with_md5["md5"] + nb_with_sha256 = await ensure_async(cm.get(path, sha256=True)) + assert nb_with_sha256["sha256"] # Test in sub-directory sub_dir = "/foo/" @@ -588,7 +588,7 @@ async def test_get(jp_contents_manager): # Test with a regular file. file_model_path = (await ensure_async(cm.new_untitled(path=sub_dir, ext=".txt")))["path"] - file_model = await ensure_async(cm.get(file_model_path, md5=True)) + file_model = await ensure_async(cm.get(file_model_path, sha256=True)) expected_model = { "content": "", "format": "text", @@ -603,7 +603,7 @@ async def test_get(jp_contents_manager): assert file_model[key] == value assert "created" in file_model assert "last_modified" in file_model - assert "md5" in file_model + assert "sha256" in file_model # Create a sub-sub directory to test getting directory contents with a # subdir.