diff --git a/.github/workflows/fetch.yml.disabled b/.github/workflows/fetch.yml.disabled new file mode 100644 index 00000000..72a41adb --- /dev/null +++ b/.github/workflows/fetch.yml.disabled @@ -0,0 +1,46 @@ +name: Fetch Data + +on: + schedule: + # 1:15am onwards, days 1-20, first month of each quarter + - cron: '15 1,5,9,13,17,21,23 1-20 1,4,7,10 *' + workflow_dispatch: + +jobs: + fetch: + runs-on: ubuntu-latest + + env: + GCS_DEVELOPER_KEY: ${{ secrets.GCS_DEVELOPER_KEY }} + GCS_CX: ${{ secrets.GCS_CX }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install pipenv + pipenv sync --system + + - name: Set PYTHONPATH + run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV + + - name: Run fetch script + run: + # python scripts/1-fetch/deviantart_fetched.py + # python scripts/1-fetch/flickr_fetched.py + # python scripts/1-fetch/github_fetched.py + # python scripts/1-fetch/internetarchive_fetched.py + # python scripts/1-fetch/metmuseum_fetched.py + # python scripts/1-fetch/vimeo_fetched.py + # python scripts/1-fetch/wikicommons_fetched.py + # python scripts/1-fetch/wikipedia_fetched.py + # python scripts/1-fetch/youtube_fetched.py + python scripts/1-fetch/gcs_fetched.py diff --git a/.github/workflows/process.yml.disabled b/.github/workflows/process.yml.disabled new file mode 100644 index 00000000..39318d6b --- /dev/null +++ b/.github/workflows/process.yml.disabled @@ -0,0 +1,42 @@ +name: Process Data + +on: + schedule: + # 1:15am onwards, days 1-20, second month of each quarter + - cron: '15 1,5,9,13,17,21,23 1-20 2,5,8,11 *' + workflow_dispatch: + +jobs: + process: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install pipenv + pipenv sync --system + + - name: Set PYTHONPATH + run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV + + - name: Run process script + run: + # python scripts/2-process/deviantart_processed.py + # python scripts/2-process/flickr_processed.py + # python scripts/2-process/github_processed.py + # python scripts/2-process/internetarchive_processed.py + # python scripts/2-process/metmuseum_processed.py + # python scripts/2-process/vimeo_processed.py + # python scripts/2-process/wikicommons_processed.py + # python scripts/2-process/wikipedia_processed.py + # python scripts/2-process/youtube_processed.py + python scripts/2-process/gcs_processed.py diff --git a/.github/workflows/report.yml.disabled b/.github/workflows/report.yml.disabled new file mode 100644 index 00000000..1c5938dd --- /dev/null +++ b/.github/workflows/report.yml.disabled @@ -0,0 +1,43 @@ +name: Generate Report + +on: + schedule: + # 1:15am onwards, days 1-20, third month of each quarter + - cron: '15 1,5,9,13,17,21,23 1-20 3,6,9,12 *' + workflow_dispatch: + + +jobs: + generate-report: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install pipenv + pipenv sync --system + + - name: Set PYTHONPATH + run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV + + - name: Run report script + run: + # python scripts/3-report/deviantart_reports.py + # python scripts/3-report/flickr_reports.py + # python scripts/3-report/github_reports.py + # python scripts/3-report/internetarchive_reports.py + # python scripts/3-report/metmuseum_reports.py + # python scripts/3-report/vimeo_reports.py + # python scripts/3-report/wikicommons_reports.py + # python scripts/3-report/wikipedia_reports.py + # python scripts/3-report/youtube_reports.py + python scripts/3-report/gcs_reports.py diff --git a/Pipfile b/Pipfile index 24a16c8d..d4687990 100644 --- a/Pipfile +++ b/Pipfile @@ -5,6 +5,8 @@ name = "pypi" [packages] flickrapi = "*" +GitPython = "*" +google-api-python-client = "*" internetarchive = "*" jupyterlab = ">=3.6.7" matplotlib = "*" @@ -27,3 +29,9 @@ pre-commit = "*" [requires] python_version = "3.11" + +[scripts] +gcs_fetched = "./scripts/1-fetch/gcs_fetched.py" +flickr_fetched = "./scripts/1-fetch/flickr_fetched.py" +gcs_processed = "./scripts/2-process/gcs_processed.py" +gcs_reports = "./scripts/3-report/gcs_reports.py" diff --git a/Pipfile.lock b/Pipfile.lock index 2027550d..62479c8f 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "a76d2223afe3038676589d0972fc708a2c2b4882c1f056cad9f3dd31fa832b95" + "sha256": "c67ae4cf56f055f7a160df6e2d223fe3a2158b0f73ffd01670237af94b96445f" }, "pipfile-spec": 6, "requires": { @@ -18,11 +18,11 @@ "default": { "anyio": { "hashes": [ - "sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8", - "sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6" + "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94", + "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7" ], "markers": "python_version >= '3.8'", - "version": "==4.3.0" + "version": "==4.4.0" }, "appnope": { "hashes": [ @@ -100,11 +100,11 @@ }, "babel": { "hashes": [ - "sha256:6919867db036398ba21eb5c7a0f6b28ab8cbc3ae7a73a44ebe34ae74a4e7d363", - "sha256:efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287" + "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb", + "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413" ], - "markers": "python_version >= '3.7'", - "version": "==2.14.0" + "markers": "python_version >= '3.8'", + "version": "==2.15.0" }, "beautifulsoup4": { "hashes": [ @@ -122,13 +122,21 @@ "markers": "python_version >= '3.8'", "version": "==6.1.0" }, + "cachetools": { + "hashes": [ + "sha256:3ae3b49a3d5e28a77a0be2b37dbcb89005058959cb2323858c2657c4a8cab474", + "sha256:b8adc2e7c07f105ced7bc56dbb6dfbe7c4a00acce20e2227b3f355be89bc6827" + ], + "markers": "python_version >= '3.7'", + "version": "==5.4.0" + }, "certifi": { "hashes": [ - "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", - "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" + "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b", + "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90" ], "markers": "python_version >= '3.6'", - "version": "==2024.2.2" + "version": "==2024.7.4" }, "cffi": { "hashes": [ @@ -292,14 +300,6 @@ "markers": "python_version >= '3.8'", "version": "==0.2.2" }, - "contextlib2": { - "hashes": [ - "sha256:3fbdb64466afd23abaf6c977627b75b6139a5a3e8ce38405c5b413aed7a0471f", - "sha256:ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869" - ], - "markers": "python_version >= '3.6'", - "version": "==21.6.0" - }, "contourpy": { "hashes": [ "sha256:00e5388f71c1a0610e6fe56b5c44ab7ba14165cdd6d695429c5cd94021e390b2", @@ -360,31 +360,31 @@ }, "debugpy": { "hashes": [ - "sha256:016a9fcfc2c6b57f939673c874310d8581d51a0fe0858e7fac4e240c5eb743cb", - "sha256:0de56aba8249c28a300bdb0672a9b94785074eb82eb672db66c8144fff673146", - "sha256:1a9fe0829c2b854757b4fd0a338d93bc17249a3bf69ecf765c61d4c522bb92a8", - "sha256:28acbe2241222b87e255260c76741e1fbf04fdc3b6d094fcf57b6c6f75ce1242", - "sha256:3a79c6f62adef994b2dbe9fc2cc9cc3864a23575b6e387339ab739873bea53d0", - "sha256:3bda0f1e943d386cc7a0e71bfa59f4137909e2ed947fb3946c506e113000f741", - "sha256:3ebb70ba1a6524d19fa7bb122f44b74170c447d5746a503e36adc244a20ac539", - "sha256:58911e8521ca0c785ac7a0539f1e77e0ce2df753f786188f382229278b4cdf23", - "sha256:6df9aa9599eb05ca179fb0b810282255202a66835c6efb1d112d21ecb830ddd3", - "sha256:7a3afa222f6fd3d9dfecd52729bc2e12c93e22a7491405a0ecbf9e1d32d45b39", - "sha256:7eb7bd2b56ea3bedb009616d9e2f64aab8fc7000d481faec3cd26c98a964bcdd", - "sha256:92116039b5500633cc8d44ecc187abe2dfa9b90f7a82bbf81d079fcdd506bae9", - "sha256:a2e658a9630f27534e63922ebf655a6ab60c370f4d2fc5c02a5b19baf4410ace", - "sha256:bfb20cb57486c8e4793d41996652e5a6a885b4d9175dd369045dad59eaacea42", - "sha256:caad2846e21188797a1f17fc09c31b84c7c3c23baf2516fed5b40b378515bbf0", - "sha256:d915a18f0597ef685e88bb35e5d7ab968964b7befefe1aaea1eb5b2640b586c7", - "sha256:dda73bf69ea479c8577a0448f8c707691152e6c4de7f0c4dec5a4bc11dee516e", - "sha256:e38beb7992b5afd9d5244e96ad5fa9135e94993b0c551ceebf3fe1a5d9beb234", - "sha256:edcc9f58ec0fd121a25bc950d4578df47428d72e1a0d66c07403b04eb93bcf98", - "sha256:efd3fdd3f67a7e576dd869c184c5dd71d9aaa36ded271939da352880c012e703", - "sha256:f696d6be15be87aef621917585f9bb94b1dc9e8aced570db1b8a6fc14e8f9b42", - "sha256:fd97ed11a4c7f6d042d320ce03d83b20c3fb40da892f994bc041bbc415d7a099" - ], - "markers": "python_version >= '3.8'", - "version": "==1.8.1" + "sha256:0600faef1d0b8d0e85c816b8bb0cb90ed94fc611f308d5fde28cb8b3d2ff0fe3", + "sha256:1523bc551e28e15147815d1397afc150ac99dbd3a8e64641d53425dba57b0ff9", + "sha256:15bc2f4b0f5e99bf86c162c91a74c0631dbd9cef3c6a1d1329c946586255e859", + "sha256:16c8dcab02617b75697a0a925a62943e26a0330da076e2a10437edd9f0bf3755", + "sha256:16e16df3a98a35c63c3ab1e4d19be4cbc7fdda92d9ddc059294f18910928e0ca", + "sha256:2cbd4d9a2fc5e7f583ff9bf11f3b7d78dfda8401e8bb6856ad1ed190be4281ad", + "sha256:3f8c3f7c53130a070f0fc845a0f2cee8ed88d220d6b04595897b66605df1edd6", + "sha256:40f062d6877d2e45b112c0bbade9a17aac507445fd638922b1a5434df34aed02", + "sha256:5a019d4574afedc6ead1daa22736c530712465c0c4cd44f820d803d937531b2d", + "sha256:5d3ccd39e4021f2eb86b8d748a96c766058b39443c1f18b2dc52c10ac2757835", + "sha256:62658aefe289598680193ff655ff3940e2a601765259b123dc7f89c0239b8cd3", + "sha256:7ee2e1afbf44b138c005e4380097d92532e1001580853a7cb40ed84e0ef1c3d2", + "sha256:7f8d57a98c5a486c5c7824bc0b9f2f11189d08d73635c326abef268f83950326", + "sha256:8a13417ccd5978a642e91fb79b871baded925d4fadd4dfafec1928196292aa0a", + "sha256:95378ed08ed2089221896b9b3a8d021e642c24edc8fef20e5d4342ca8be65c00", + "sha256:acdf39855f65c48ac9667b2801234fc64d46778021efac2de7e50907ab90c634", + "sha256:bd11fe35d6fd3431f1546d94121322c0ac572e1bfb1f6be0e9b8655fb4ea941e", + "sha256:c78ba1680f1015c0ca7115671fe347b28b446081dada3fedf54138f44e4ba031", + "sha256:cf327316ae0c0e7dd81eb92d24ba8b5e88bb4d1b585b5c0d32929274a66a5210", + "sha256:d3408fddd76414034c02880e891ea434e9a9cf3a69842098ef92f6e809d09afa", + "sha256:e24ccb0cd6f8bfaec68d577cb49e9c680621c336f347479b3fce060ba7c09ec1", + "sha256:f179af1e1bd4c88b0b9f0fa153569b24f6b6f3de33f94703336363ae62f4bf47" + ], + "markers": "python_version >= '3.8'", + "version": "==1.8.2" }, "decorator": { "hashes": [ @@ -418,10 +418,10 @@ }, "fastjsonschema": { "hashes": [ - "sha256:3672b47bc94178c9f23dbb654bf47440155d4db9df5f7bc47643315f9c405cd0", - "sha256:e3126a94bdc4623d3de4485f8d468a12f02a67921315ddc87836d6e456dc789d" + "sha256:3d48fc5300ee96f5d116f10fe6f28d938e6008f59a6a025c2649475b87f76a23", + "sha256:5875f0b0fa7a0043a91e93a9b8f793bcbbba9691e7fd83dca95c28ba26d21f0a" ], - "version": "==2.19.1" + "version": "==2.20.0" }, "flickrapi": { "hashes": [ @@ -433,51 +433,51 @@ }, "fonttools": { "hashes": [ - "sha256:0118ef998a0699a96c7b28457f15546815015a2710a1b23a7bf6c1be60c01636", - "sha256:0d145976194a5242fdd22df18a1b451481a88071feadf251221af110ca8f00ce", - "sha256:0e19bd9e9964a09cd2433a4b100ca7f34e34731e0758e13ba9a1ed6e5468cc0f", - "sha256:0f08c901d3866a8905363619e3741c33f0a83a680d92a9f0e575985c2634fcc1", - "sha256:1250e818b5f8a679ad79660855528120a8f0288f8f30ec88b83db51515411fcc", - "sha256:15c94eeef6b095831067f72c825eb0e2d48bb4cea0647c1b05c981ecba2bf39f", - "sha256:1621ee57da887c17312acc4b0e7ac30d3a4fb0fec6174b2e3754a74c26bbed1e", - "sha256:180194c7fe60c989bb627d7ed5011f2bef1c4d36ecf3ec64daec8302f1ae0716", - "sha256:278e50f6b003c6aed19bae2242b364e575bcb16304b53f2b64f6551b9c000e15", - "sha256:32b17504696f605e9e960647c5f64b35704782a502cc26a37b800b4d69ff3c77", - "sha256:3bee3f3bd9fa1d5ee616ccfd13b27ca605c2b4270e45715bd2883e9504735034", - "sha256:4060acc2bfa2d8e98117828a238889f13b6f69d59f4f2d5857eece5277b829ba", - "sha256:54dcf21a2f2d06ded676e3c3f9f74b2bafded3a8ff12f0983160b13e9f2fb4a7", - "sha256:56fc244f2585d6c00b9bcc59e6593e646cf095a96fe68d62cd4da53dd1287b55", - "sha256:599bdb75e220241cedc6faebfafedd7670335d2e29620d207dd0378a4e9ccc5a", - "sha256:5f6bc991d1610f5c3bbe997b0233cbc234b8e82fa99fc0b2932dc1ca5e5afec0", - "sha256:60a3409c9112aec02d5fb546f557bca6efa773dcb32ac147c6baf5f742e6258b", - "sha256:68b3fb7775a923be73e739f92f7e8a72725fd333eab24834041365d2278c3671", - "sha256:76f1777d8b3386479ffb4a282e74318e730014d86ce60f016908d9801af9ca2a", - "sha256:806e7912c32a657fa39d2d6eb1d3012d35f841387c8fc6cf349ed70b7c340039", - "sha256:84d7751f4468dd8cdd03ddada18b8b0857a5beec80bce9f435742abc9a851a74", - "sha256:865a58b6e60b0938874af0968cd0553bcd88e0b2cb6e588727117bd099eef836", - "sha256:8ac27f436e8af7779f0bb4d5425aa3535270494d3bc5459ed27de3f03151e4c2", - "sha256:8b4850fa2ef2cfbc1d1f689bc159ef0f45d8d83298c1425838095bf53ef46308", - "sha256:8b5ad456813d93b9c4b7ee55302208db2b45324315129d85275c01f5cb7e61a2", - "sha256:8e2f1a4499e3b5ee82c19b5ee57f0294673125c65b0a1ff3764ea1f9db2f9ef5", - "sha256:9696fe9f3f0c32e9a321d5268208a7cc9205a52f99b89479d1b035ed54c923f1", - "sha256:96a48e137c36be55e68845fc4284533bda2980f8d6f835e26bca79d7e2006438", - "sha256:a8feca65bab31479d795b0d16c9a9852902e3a3c0630678efb0b2b7941ea9c74", - "sha256:aefa011207ed36cd280babfaa8510b8176f1a77261833e895a9d96e57e44802f", - "sha256:b2b92381f37b39ba2fc98c3a45a9d6383bfc9916a87d66ccb6553f7bdd129097", - "sha256:b3c61423f22165541b9403ee39874dcae84cd57a9078b82e1dce8cb06b07fa2e", - "sha256:b5b48a1121117047d82695d276c2af2ee3a24ffe0f502ed581acc2673ecf1037", - "sha256:c18b49adc721a7d0b8dfe7c3130c89b8704baf599fb396396d07d4aa69b824a1", - "sha256:c5b8cab0c137ca229433570151b5c1fc6af212680b58b15abd797dcdd9dd5051", - "sha256:c7e91abdfae1b5c9e3a543f48ce96013f9a08c6c9668f1e6be0beabf0a569c1b", - "sha256:cadf4e12a608ef1d13e039864f484c8a968840afa0258b0b843a0556497ea9ed", - "sha256:dc0673361331566d7a663d7ce0f6fdcbfbdc1f59c6e3ed1165ad7202ca183c68", - "sha256:de7c29bdbdd35811f14493ffd2534b88f0ce1b9065316433b22d63ca1cd21f14", - "sha256:e9d9298be7a05bb4801f558522adbe2feea1b0b103d5294ebf24a92dd49b78e5", - "sha256:ee1af4be1c5afe4c96ca23badd368d8dc75f611887fb0c0dac9f71ee5d6f110e", - "sha256:f7e89853d8bea103c8e3514b9f9dc86b5b4120afb4583b57eb10dfa5afbe0936" - ], - "markers": "python_version >= '3.8'", - "version": "==4.51.0" + "sha256:02569e9a810f9d11f4ae82c391ebc6fb5730d95a0657d24d754ed7763fb2d122", + "sha256:0679a30b59d74b6242909945429dbddb08496935b82f91ea9bf6ad240ec23397", + "sha256:10f5e6c3510b79ea27bb1ebfcc67048cde9ec67afa87c7dd7efa5c700491ac7f", + "sha256:2af40ae9cdcb204fc1d8f26b190aa16534fcd4f0df756268df674a270eab575d", + "sha256:32f029c095ad66c425b0ee85553d0dc326d45d7059dbc227330fc29b43e8ba60", + "sha256:35250099b0cfb32d799fb5d6c651220a642fe2e3c7d2560490e6f1d3f9ae9169", + "sha256:3b3c8ebafbee8d9002bd8f1195d09ed2bd9ff134ddec37ee8f6a6375e6a4f0e8", + "sha256:4824c198f714ab5559c5be10fd1adf876712aa7989882a4ec887bf1ef3e00e31", + "sha256:5ff7e5e9bad94e3a70c5cd2fa27f20b9bb9385e10cddab567b85ce5d306ea923", + "sha256:651390c3b26b0c7d1f4407cad281ee7a5a85a31a110cbac5269de72a51551ba2", + "sha256:6e08f572625a1ee682115223eabebc4c6a2035a6917eac6f60350aba297ccadb", + "sha256:6ed170b5e17da0264b9f6fae86073be3db15fa1bd74061c8331022bca6d09bab", + "sha256:73379d3ffdeecb376640cd8ed03e9d2d0e568c9d1a4e9b16504a834ebadc2dfb", + "sha256:75a157d8d26c06e64ace9df037ee93a4938a4606a38cb7ffaf6635e60e253b7a", + "sha256:791b31ebbc05197d7aa096bbc7bd76d591f05905d2fd908bf103af4488e60670", + "sha256:7b6b35e52ddc8fb0db562133894e6ef5b4e54e1283dff606fda3eed938c36fc8", + "sha256:84ec3fb43befb54be490147b4a922b5314e16372a643004f182babee9f9c3407", + "sha256:8959a59de5af6d2bec27489e98ef25a397cfa1774b375d5787509c06659b3671", + "sha256:9dfdae43b7996af46ff9da520998a32b105c7f098aeea06b2226b30e74fbba88", + "sha256:9e6ceba2a01b448e36754983d376064730690401da1dd104ddb543519470a15f", + "sha256:9efd176f874cb6402e607e4cc9b4a9cd584d82fc34a4b0c811970b32ba62501f", + "sha256:a1c7c5aa18dd3b17995898b4a9b5929d69ef6ae2af5b96d585ff4005033d82f0", + "sha256:aae7bd54187e8bf7fd69f8ab87b2885253d3575163ad4d669a262fe97f0136cb", + "sha256:b21952c092ffd827504de7e66b62aba26fdb5f9d1e435c52477e6486e9d128b2", + "sha256:b96cd370a61f4d083c9c0053bf634279b094308d52fdc2dd9a22d8372fdd590d", + "sha256:becc5d7cb89c7b7afa8321b6bb3dbee0eec2b57855c90b3e9bf5fb816671fa7c", + "sha256:bee32ea8765e859670c4447b0817514ca79054463b6b79784b08a8df3a4d78e3", + "sha256:c6e7170d675d12eac12ad1a981d90f118c06cf680b42a2d74c6c931e54b50719", + "sha256:c818c058404eb2bba05e728d38049438afd649e3c409796723dfc17cd3f08749", + "sha256:c8696544c964500aa9439efb6761947393b70b17ef4e82d73277413f291260a4", + "sha256:c9cd19cf4fe0595ebdd1d4915882b9440c3a6d30b008f3cc7587c1da7b95be5f", + "sha256:d4d0096cb1ac7a77b3b41cd78c9b6bc4a400550e21dc7a92f2b5ab53ed74eb02", + "sha256:d92d3c2a1b39631a6131c2fa25b5406855f97969b068e7e08413325bc0afba58", + "sha256:da33440b1413bad53a8674393c5d29ce64d8c1a15ef8a77c642ffd900d07bfe1", + "sha256:e013aae589c1c12505da64a7d8d023e584987e51e62006e1bb30d72f26522c41", + "sha256:e128778a8e9bc11159ce5447f76766cefbd876f44bd79aff030287254e4752c4", + "sha256:e54f1bba2f655924c1138bbc7fa91abd61f45c68bd65ab5ed985942712864bbb", + "sha256:e5b708073ea3d684235648786f5f6153a48dc8762cdfe5563c57e80787c29fbb", + "sha256:e8bf06b94694251861ba7fdeea15c8ec0967f84c3d4143ae9daf42bbc7717fe3", + "sha256:f08df60fbd8d289152079a65da4e66a447efc1d5d5a4d3f299cdd39e3b2e4a7d", + "sha256:f1f8758a2ad110bd6432203a344269f445a2907dc24ef6bccfd0ac4e14e0d71d", + "sha256:f677ce218976496a587ab17140da141557beb91d2a5c1a14212c994093f2eae2" + ], + "markers": "python_version >= '3.8'", + "version": "==4.53.1" }, "fqdn": { "hashes": [ @@ -486,6 +486,63 @@ ], "version": "==1.5.1" }, + "gitdb": { + "hashes": [ + "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4", + "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b" + ], + "markers": "python_version >= '3.7'", + "version": "==4.0.11" + }, + "gitpython": { + "hashes": [ + "sha256:35f314a9f878467f5453cc1fee295c3e18e52f1b99f10f6cf5b1682e968a9e7c", + "sha256:eec7ec56b92aad751f9912a73404bc02ba212a23adb2c7098ee668417051a1ff" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==3.1.43" + }, + "google-api-core": { + "hashes": [ + "sha256:f12a9b8309b5e21d92483bbd47ce2c445861ec7d269ef6784ecc0ea8c1fa6125", + "sha256:f4695f1e3650b316a795108a76a1c416e6afb036199d1c1f1f110916df479ffd" + ], + "markers": "python_version >= '3.7'", + "version": "==2.19.1" + }, + "google-api-python-client": { + "hashes": [ + "sha256:1850a92505d91a82e2ca1635ab2b8dff179f4b67082c2651e1db332e8039840c", + "sha256:ed4bc3abe2c060a87412465b4e8254620bbbc548eefc5388e2c5ff912d36a68b" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==2.139.0" + }, + "google-auth": { + "hashes": [ + "sha256:49315be72c55a6a37d62819e3573f6b416aca00721f7e3e31a008d928bf64022", + "sha256:53326ea2ebec768070a94bee4e1b9194c9646ea0c2bd72422785bd0f9abfad7b" + ], + "markers": "python_version >= '3.7'", + "version": "==2.32.0" + }, + "google-auth-httplib2": { + "hashes": [ + "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", + "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d" + ], + "version": "==0.2.0" + }, + "googleapis-common-protos": { + "hashes": [ + "sha256:27a2499c7e8aff199665b22741997e485eccc8645aa9176c7c988e6fae507945", + "sha256:27c5abdffc4911f28101e635de1533fb4cfd2c37fbaa9174587c799fac90aa87" + ], + "markers": "python_version >= '3.7'", + "version": "==1.63.2" + }, "h11": { "hashes": [ "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", @@ -502,6 +559,14 @@ "markers": "python_version >= '3.8'", "version": "==1.0.5" }, + "httplib2": { + "hashes": [ + "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", + "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==0.22.0" + }, "httpx": { "hashes": [ "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5", @@ -520,28 +585,28 @@ }, "internetarchive": { "hashes": [ - "sha256:8e897ee1c054edff40744e65e16d697dbfae26b0645a5fc7762cbaf88aff87b7", - "sha256:e3826b3ff59fa516eb3c6c55edbe6acbece6dfaad79e3fb9109860962c14c6c2" + "sha256:0a0e30ade737f7be971b31c38b8c856867a316c5b1d646f055b2b1946aa9cb00", + "sha256:467188386218d2c77815ca798a51dd18310b326841113bb462b24ea9a71beedc" ], "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==4.0.1" + "version": "==4.1.0" }, "ipykernel": { "hashes": [ - "sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da", - "sha256:3d44070060f9475ac2092b760123fadf105d2e2493c24848b6691a7c4f42af5c" + "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5", + "sha256:f093a22c4a40f8828f8e330a9c297cb93dcab13bd9678ded6de8e5cf81c56215" ], "markers": "python_version >= '3.8'", - "version": "==6.29.4" + "version": "==6.29.5" }, "ipython": { "hashes": [ - "sha256:07232af52a5ba146dc3372c7bf52a0f890a23edf38d77caef8d53f9cdc2584c1", - "sha256:7468edaf4f6de3e1b912e57f66c241e6fd3c7099f2ec2136e239e142e800274d" + "sha256:1cec0fbba8404af13facebe83d04436a7434c7400e59f47acf467c64abd0956c", + "sha256:e6b347c27bdf9c32ee9d31ae85defc525755a1869f14057e900675b9e8d6e6ff" ], "markers": "python_version >= '3.10'", - "version": "==8.23.0" + "version": "==8.26.0" }, "isoduration": { "hashes": [ @@ -560,11 +625,11 @@ }, "jinja2": { "hashes": [ - "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa", - "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90" + "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369", + "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d" ], "markers": "python_version >= '3.7'", - "version": "==3.1.3" + "version": "==3.1.4" }, "json5": { "hashes": [ @@ -584,22 +649,22 @@ }, "jsonpointer": { "hashes": [ - "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a", - "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88" + "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", + "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", - "version": "==2.4" + "markers": "python_version >= '3.7'", + "version": "==3.0.0" }, "jsonschema": { "extras": [ "format-nongpl" ], "hashes": [ - "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f", - "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5" + "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4", + "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566" ], "markers": "python_version >= '3.8'", - "version": "==4.21.1" + "version": "==4.23.0" }, "jsonschema-specifications": { "hashes": [ @@ -611,11 +676,11 @@ }, "jupyter-client": { "hashes": [ - "sha256:3b7bd22f058434e3b9a7ea4b1500ed47de2713872288c0d511d19926f99b459f", - "sha256:e842515e2bab8e19186d89fdfea7abd15e39dd581f94e399f00e2af5a1652d3f" + "sha256:2bda14d55ee5ba58552a8c53ae43d215ad9868853489213f37da060ced54d8df", + "sha256:50cbc5c66fd1b8f65ecb66bc490ab73217993632809b6e505687de18e9dea39f" ], "markers": "python_version >= '3.8'", - "version": "==8.6.1" + "version": "==8.6.2" }, "jupyter-core": { "hashes": [ @@ -643,11 +708,11 @@ }, "jupyter-server": { "hashes": [ - "sha256:659154cea512083434fd7c93b7fe0897af7a2fd0b9dd4749282b42eaac4ae677", - "sha256:fb6be52c713e80e004fac34b35a0990d6d36ba06fd0a2b2ed82b899143a64210" + "sha256:47ff506127c2f7851a17bf4713434208fc490955d0e8632e95014a9a9afbeefd", + "sha256:66095021aa9638ced276c248b1d81862e4c50f292d575920bbe960de1c56b12b" ], "markers": "python_version >= '3.8'", - "version": "==2.14.0" + "version": "==2.14.2" }, "jupyter-server-terminals": { "hashes": [ @@ -659,12 +724,12 @@ }, "jupyterlab": { "hashes": [ - "sha256:7935f36ba26eb615183a4f5c2bbca5791b5108ce2a00b5505f8cfd100d53648e", - "sha256:cf3e862bc10dbf4331e4eb37438634f813c238cfc62c71c640b3b3b2caa089a8" + "sha256:343a979fb9582fd08c8511823e320703281cd072a0049bcdafdc7afeda7f2537", + "sha256:807a7ec73637744f879e112060d4b9d9ebe028033b7a429b2d1f4fc523d00245" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==4.1.6" + "version": "==4.2.4" }, "jupyterlab-pygments": { "hashes": [ @@ -676,11 +741,11 @@ }, "jupyterlab-server": { "hashes": [ - "sha256:54622cbd330526a385ee0c1fdccdff3a1e7219bf3e864a335284a1270a1973df", - "sha256:9b3ba91cf2837f7f124fca36d63f3ca80ace2bed4898a63dd47e6598c1ab006f" + "sha256:e697488f66c3db49df675158a77b3b017520d772c6e1548c7d9bcc5df7944ee4", + "sha256:eb36caca59e74471988f0ae25c77945610b887f777255aa21f8065def9e51ed4" ], "markers": "python_version >= '3.8'", - "version": "==2.26.0" + "version": "==2.27.3" }, "kiwisolver": { "hashes": [ @@ -860,38 +925,39 @@ }, "matplotlib": { "hashes": [ - "sha256:1c13f041a7178f9780fb61cc3a2b10423d5e125480e4be51beaf62b172413b67", - "sha256:232ce322bfd020a434caaffbd9a95333f7c2491e59cfc014041d95e38ab90d1c", - "sha256:493e9f6aa5819156b58fce42b296ea31969f2aab71c5b680b4ea7a3cb5c07d94", - "sha256:50bac6e4d77e4262c4340d7a985c30912054745ec99756ce213bfbc3cb3808eb", - "sha256:606e3b90897554c989b1e38a258c626d46c873523de432b1462f295db13de6f9", - "sha256:6209e5c9aaccc056e63b547a8152661324404dd92340a6e479b3a7f24b42a5d0", - "sha256:6485ac1f2e84676cff22e693eaa4fbed50ef5dc37173ce1f023daef4687df616", - "sha256:6addbd5b488aedb7f9bc19f91cd87ea476206f45d7116fcfe3d31416702a82fa", - "sha256:72f9322712e4562e792b2961971891b9fbbb0e525011e09ea0d1f416c4645661", - "sha256:7a6769f58ce51791b4cb8b4d7642489df347697cd3e23d88266aaaee93b41d9a", - "sha256:8080d5081a86e690d7688ffa542532e87f224c38a6ed71f8fbed34dd1d9fedae", - "sha256:843cbde2f0946dadd8c5c11c6d91847abd18ec76859dc319362a0964493f0ba6", - "sha256:8aac397d5e9ec158960e31c381c5ffc52ddd52bd9a47717e2a694038167dffea", - "sha256:8f65c9f002d281a6e904976007b2d46a1ee2bcea3a68a8c12dda24709ddc9106", - "sha256:90df07db7b599fe7035d2f74ab7e438b656528c68ba6bb59b7dc46af39ee48ef", - "sha256:9bb0189011785ea794ee827b68777db3ca3f93f3e339ea4d920315a0e5a78d54", - "sha256:a0e47eda4eb2614300fc7bb4657fced3e83d6334d03da2173b09e447418d499f", - "sha256:abc9d838f93583650c35eca41cfcec65b2e7cb50fd486da6f0c49b5e1ed23014", - "sha256:ac24233e8f2939ac4fd2919eed1e9c0871eac8057666070e94cbf0b33dd9c338", - "sha256:b12ba985837e4899b762b81f5b2845bd1a28f4fdd1a126d9ace64e9c4eb2fb25", - "sha256:b7a2a253d3b36d90c8993b4620183b55665a429da8357a4f621e78cd48b2b30b", - "sha256:c7064120a59ce6f64103c9cefba8ffe6fba87f2c61d67c401186423c9a20fd35", - "sha256:c89ee9314ef48c72fe92ce55c4e95f2f39d70208f9f1d9db4e64079420d8d732", - "sha256:cc4ccdc64e3039fc303defd119658148f2349239871db72cd74e2eeaa9b80b71", - "sha256:ce1edd9f5383b504dbc26eeea404ed0a00656c526638129028b758fd43fc5f10", - "sha256:ecd79298550cba13a43c340581a3ec9c707bd895a6a061a78fa2524660482fc0", - "sha256:f51c4c869d4b60d769f7b4406eec39596648d9d70246428745a681c327a8ad30", - "sha256:fb44f53af0a62dc80bba4443d9b27f2fde6acfdac281d95bc872dc148a6509cc" + "sha256:0000354e32efcfd86bda75729716b92f5c2edd5b947200be9881f0a671565c33", + "sha256:0c584210c755ae921283d21d01f03a49ef46d1afa184134dd0f95b0202ee6f03", + "sha256:0e835c6988edc3d2d08794f73c323cc62483e13df0194719ecb0723b564e0b5c", + "sha256:0fc001516ffcf1a221beb51198b194d9230199d6842c540108e4ce109ac05cc0", + "sha256:11fed08f34fa682c2b792942f8902e7aefeed400da71f9e5816bea40a7ce28fe", + "sha256:208cbce658b72bf6a8e675058fbbf59f67814057ae78165d8a2f87c45b48d0ff", + "sha256:2315837485ca6188a4b632c5199900e28d33b481eb083663f6a44cfc8987ded3", + "sha256:26040c8f5121cd1ad712abffcd4b5222a8aec3a0fe40bc8542c94331deb8780d", + "sha256:3fda72d4d472e2ccd1be0e9ccb6bf0d2eaf635e7f8f51d737ed7e465ac020cb3", + "sha256:421851f4f57350bcf0811edd754a708d2275533e84f52f6760b740766c6747a7", + "sha256:44a21d922f78ce40435cb35b43dd7d573cf2a30138d5c4b709d19f00e3907fd7", + "sha256:4db17fea0ae3aceb8e9ac69c7e3051bae0b3d083bfec932240f9bf5d0197a049", + "sha256:565d572efea2b94f264dd86ef27919515aa6d629252a169b42ce5f570db7f37b", + "sha256:591d3a88903a30a6d23b040c1e44d1afdd0d778758d07110eb7596f811f31842", + "sha256:6d397fd8ccc64af2ec0af1f0efc3bacd745ebfb9d507f3f552e8adb689ed730a", + "sha256:7ccd6270066feb9a9d8e0705aa027f1ff39f354c72a87efe8fa07632f30fc6bb", + "sha256:82cd5acf8f3ef43f7532c2f230249720f5dc5dd40ecafaf1c60ac8200d46d7eb", + "sha256:83c6a792f1465d174c86d06f3ae85a8fe36e6f5964633ae8106312ec0921fdf5", + "sha256:84b3ba8429935a444f1fdc80ed930babbe06725bcf09fbeb5c8757a2cd74af04", + "sha256:a0c977c5c382f6696caf0bd277ef4f936da7e2aa202ff66cad5f0ac1428ee15b", + "sha256:a973c53ad0668c53e0ed76b27d2eeeae8799836fd0d0caaa4ecc66bf4e6676c0", + "sha256:ab38a4f3772523179b2f772103d8030215b318fef6360cb40558f585bf3d017f", + "sha256:b3fce58971b465e01b5c538f9d44915640c20ec5ff31346e963c9e1cd66fa812", + "sha256:b918770bf3e07845408716e5bbda17eadfc3fcbd9307dc67f37d6cf834bb3d98", + "sha256:d12cb1837cffaac087ad6b44399d5e22b78c729de3cdae4629e252067b705e2b", + "sha256:dc23f48ab630474264276be156d0d7710ac6c5a09648ccdf49fef9200d8cbe80", + "sha256:dd2a59ff4b83d33bca3b5ec58203cc65985367812cb8c257f3e101632be86d92", + "sha256:de06b19b8db95dd33d0dc17c926c7c9ebed9f572074b6fac4f65068a6814d010", + "sha256:f1f2e5d29e9435c97ad4c36fb6668e89aee13d48c75893e25cef064675038ac9" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==3.8.4" + "version": "==3.9.1" }, "matplotlib-inline": { "hashes": [ @@ -919,11 +985,11 @@ }, "nbconvert": { "hashes": [ - "sha256:a6733b78ce3d47c3f85e504998495b07e6ea9cf9bf6ec1c98dda63ec6ad19142", - "sha256:ddeff14beeeedf3dd0bc506623e41e4507e551736de59df69a91f86700292b3b" + "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3", + "sha256:86ca91ba266b0a448dc96fa6c5b9d98affabde2867b363258703536807f9f7f4" ], "markers": "python_version >= '3.8'", - "version": "==7.16.3" + "version": "==7.16.4" }, "nbformat": { "hashes": [ @@ -951,46 +1017,55 @@ }, "numpy": { "hashes": [ - "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", - "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", - "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20", - "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", - "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", - "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a", - "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea", - "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c", - "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71", - "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", - "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be", - "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a", - "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a", - "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5", - "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", - "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd", - "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c", - "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e", - "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0", - "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c", - "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a", - "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", - "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0", - "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6", - "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2", - "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", - "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30", - "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", - "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5", - "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07", - "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2", - "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4", - "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764", - "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef", - "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3", - "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f" + "sha256:08458fbf403bff5e2b45f08eda195d4b0c9b35682311da5a5a0a0925b11b9bd8", + "sha256:0fbb536eac80e27a2793ffd787895242b7f18ef792563d742c2d673bfcb75134", + "sha256:12f5d865d60fb9734e60a60f1d5afa6d962d8d4467c120a1c0cda6eb2964437d", + "sha256:15eb4eca47d36ec3f78cde0a3a2ee24cf05ca7396ef808dda2c0ddad7c2bde67", + "sha256:173a00b9995f73b79eb0191129f2455f1e34c203f559dd118636858cc452a1bf", + "sha256:1b902ce0e0a5bb7704556a217c4f63a7974f8f43e090aff03fcf262e0b135e02", + "sha256:1f682ea61a88479d9498bf2091fdcd722b090724b08b31d63e022adc063bad59", + "sha256:1f87fec1f9bc1efd23f4227becff04bd0e979e23ca50cc92ec88b38489db3b55", + "sha256:24a0e1befbfa14615b49ba9659d3d8818a0f4d8a1c5822af8696706fbda7310c", + "sha256:2c3a346ae20cfd80b6cfd3e60dc179963ef2ea58da5ec074fd3d9e7a1e7ba97f", + "sha256:36d3a9405fd7c511804dc56fc32974fa5533bdeb3cd1604d6b8ff1d292b819c4", + "sha256:3fdabe3e2a52bc4eff8dc7a5044342f8bd9f11ef0934fcd3289a788c0eb10018", + "sha256:4127d4303b9ac9f94ca0441138acead39928938660ca58329fe156f84b9f3015", + "sha256:4658c398d65d1b25e1760de3157011a80375da861709abd7cef3bad65d6543f9", + "sha256:485b87235796410c3519a699cfe1faab097e509e90ebb05dcd098db2ae87e7b3", + "sha256:529af13c5f4b7a932fb0e1911d3a75da204eff023ee5e0e79c1751564221a5c8", + "sha256:5a3d94942c331dd4e0e1147f7a8699a4aa47dffc11bf8a1523c12af8b2e91bbe", + "sha256:5daab361be6ddeb299a918a7c0864fa8618af66019138263247af405018b04e1", + "sha256:61728fba1e464f789b11deb78a57805c70b2ed02343560456190d0501ba37b0f", + "sha256:6790654cb13eab303d8402354fabd47472b24635700f631f041bd0b65e37298a", + "sha256:69ff563d43c69b1baba77af455dd0a839df8d25e8590e79c90fcbe1499ebde42", + "sha256:6bf4e6f4a2a2e26655717a1983ef6324f2664d7011f6ef7482e8c0b3d51e82ac", + "sha256:6e4eeb6eb2fced786e32e6d8df9e755ce5be920d17f7ce00bc38fcde8ccdbf9e", + "sha256:72dc22e9ec8f6eaa206deb1b1355eb2e253899d7347f5e2fae5f0af613741d06", + "sha256:75b4e316c5902d8163ef9d423b1c3f2f6252226d1aa5cd8a0a03a7d01ffc6268", + "sha256:7b9853803278db3bdcc6cd5beca37815b133e9e77ff3d4733c247414e78eb8d1", + "sha256:7d6fddc5fe258d3328cd8e3d7d3e02234c5d70e01ebe377a6ab92adb14039cb4", + "sha256:81b0893a39bc5b865b8bf89e9ad7807e16717f19868e9d234bdaf9b1f1393868", + "sha256:8efc84f01c1cd7e34b3fb310183e72fcdf55293ee736d679b6d35b35d80bba26", + "sha256:8fae4ebbf95a179c1156fab0b142b74e4ba4204c87bde8d3d8b6f9c34c5825ef", + "sha256:99d0d92a5e3613c33a5f01db206a33f8fdf3d71f2912b0de1739894668b7a93b", + "sha256:9adbd9bb520c866e1bfd7e10e1880a1f7749f1f6e5017686a5fbb9b72cf69f82", + "sha256:a1e01dcaab205fbece13c1410253a9eea1b1c9b61d237b6fa59bcc46e8e89343", + "sha256:a8fc2de81ad835d999113ddf87d1ea2b0f4704cbd947c948d2f5513deafe5a7b", + "sha256:b83e16a5511d1b1f8a88cbabb1a6f6a499f82c062a4251892d9ad5d609863fb7", + "sha256:bb2124fdc6e62baae159ebcfa368708867eb56806804d005860b6007388df171", + "sha256:bfc085b28d62ff4009364e7ca34b80a9a080cbd97c2c0630bb5f7f770dae9414", + "sha256:cbab9fc9c391700e3e1287666dfd82d8666d10e69a6c4a09ab97574c0b7ee0a7", + "sha256:e5eeca8067ad04bc8a2a8731183d51d7cbaac66d86085d5f4766ee6bf19c7f87", + "sha256:e9e81fa9017eaa416c056e5d9e71be93d05e2c3c2ab308d23307a8bc4443c368", + "sha256:ea2326a4dca88e4a274ba3a4405eb6c6467d3ffbd8c7d38632502eaae3820587", + "sha256:eacf3291e263d5a67d8c1a581a8ebbcfd6447204ef58828caf69a5e3e8c75990", + "sha256:ec87f5f8aca726117a1c9b7083e7656a9d0d606eec7299cc067bb83d26f16e0c", + "sha256:f1659887361a7151f89e79b276ed8dff3d75877df906328f14d8bb40bb4f5101", + "sha256:f9cf5ea551aec449206954b075db819f52adc1638d46a6738253a712d553c7b4" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==1.26.4" + "version": "==2.0.1" }, "oauthlib": { "hashes": [ @@ -1010,11 +1085,11 @@ }, "packaging": { "hashes": [ - "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5", - "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9" + "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", + "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" ], - "markers": "python_version >= '3.7'", - "version": "==24.0" + "markers": "python_version >= '3.8'", + "version": "==24.1" }, "pandas": { "hashes": [ @@ -1078,95 +1153,106 @@ }, "pillow": { "hashes": [ - "sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c", - "sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2", - "sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb", - "sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d", - "sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa", - "sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3", - "sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1", - "sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a", - "sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd", - "sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8", - "sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999", - "sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599", - "sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936", - "sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375", - "sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d", - "sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b", - "sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60", - "sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572", - "sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3", - "sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced", - "sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f", - "sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b", - "sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19", - "sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f", - "sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d", - "sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383", - "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795", - "sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355", - "sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57", - "sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09", - "sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b", - "sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462", - "sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf", - "sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f", - "sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a", - "sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad", - "sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9", - "sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d", - "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45", - "sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994", - "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d", - "sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338", - "sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463", - "sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451", - "sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591", - "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c", - "sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd", - "sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32", - "sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9", - "sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf", - "sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5", - "sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828", - "sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3", - "sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5", - "sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2", - "sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b", - "sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2", - "sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475", - "sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3", - "sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb", - "sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef", - "sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015", - "sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002", - "sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170", - "sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84", - "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57", - "sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f", - "sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27", - "sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a" - ], - "markers": "python_version >= '3.8'", - "version": "==10.3.0" + "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885", + "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea", + "sha256:06b2f7898047ae93fad74467ec3d28fe84f7831370e3c258afa533f81ef7f3df", + "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5", + "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c", + "sha256:0ae24a547e8b711ccaaf99c9ae3cd975470e1a30caa80a6aaee9a2f19c05701d", + "sha256:134ace6dc392116566980ee7436477d844520a26a4b1bd4053f6f47d096997fd", + "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06", + "sha256:1b5dea9831a90e9d0721ec417a80d4cbd7022093ac38a568db2dd78363b00908", + "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a", + "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be", + "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0", + "sha256:298478fe4f77a4408895605f3482b6cc6222c018b2ce565c2b6b9c354ac3229b", + "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80", + "sha256:2db98790afc70118bd0255c2eeb465e9767ecf1f3c25f9a1abb8ffc8cfd1fe0a", + "sha256:32cda9e3d601a52baccb2856b8ea1fc213c90b340c542dcef77140dfa3278a9e", + "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9", + "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696", + "sha256:43efea75eb06b95d1631cb784aa40156177bf9dd5b4b03ff38979e048258bc6b", + "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309", + "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e", + "sha256:5161eef006d335e46895297f642341111945e2c1c899eb406882a6c61a4357ab", + "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d", + "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060", + "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d", + "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d", + "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4", + "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3", + "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6", + "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb", + "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94", + "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b", + "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496", + "sha256:73664fe514b34c8f02452ffb73b7a92c6774e39a647087f83d67f010eb9a0cf0", + "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319", + "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b", + "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856", + "sha256:7970285ab628a3779aecc35823296a7869f889b8329c16ad5a71e4901a3dc4ef", + "sha256:7a8d4bade9952ea9a77d0c3e49cbd8b2890a399422258a77f357b9cc9be8d680", + "sha256:7c1ee6f42250df403c5f103cbd2768a28fe1a0ea1f0f03fe151c8741e1469c8b", + "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42", + "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e", + "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597", + "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a", + "sha256:87dd88ded2e6d74d31e1e0a99a726a6765cda32d00ba72dc37f0651f306daaa8", + "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3", + "sha256:8d4d5063501b6dd4024b8ac2f04962d661222d120381272deea52e3fc52d3736", + "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da", + "sha256:930044bb7679ab003b14023138b50181899da3f25de50e9dbee23b61b4de2126", + "sha256:950be4d8ba92aca4b2bb0741285a46bfae3ca699ef913ec8416c1b78eadd64cd", + "sha256:961a7293b2457b405967af9c77dcaa43cc1a8cd50d23c532e62d48ab6cdd56f5", + "sha256:9b885f89040bb8c4a1573566bbb2f44f5c505ef6e74cec7ab9068c900047f04b", + "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026", + "sha256:a02364621fe369e06200d4a16558e056fe2805d3468350df3aef21e00d26214b", + "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc", + "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46", + "sha256:b15e02e9bb4c21e39876698abf233c8c579127986f8207200bc8a8f6bb27acf2", + "sha256:b2724fdb354a868ddf9a880cb84d102da914e99119211ef7ecbdc613b8c96b3c", + "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe", + "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984", + "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a", + "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70", + "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca", + "sha256:c76e5786951e72ed3686e122d14c5d7012f16c8303a674d18cdcd6d89557fc5b", + "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91", + "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3", + "sha256:d7480af14364494365e89d6fddc510a13e5a2c3584cb19ef65415ca57252fb84", + "sha256:dbc6ae66518ab3c5847659e9988c3b60dc94ffb48ef9168656e0019a93dbf8a1", + "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5", + "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be", + "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f", + "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc", + "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9", + "sha256:e88d5e6ad0d026fba7bdab8c3f225a69f063f116462c49892b0149e21b6c0a0e", + "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141", + "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef", + "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22", + "sha256:f7baece4ce06bade126fb84b8af1c33439a76d8a6fd818970215e0560ca28c27", + "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e", + "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1" + ], + "markers": "python_version >= '3.8'", + "version": "==10.4.0" }, "platformdirs": { "hashes": [ - "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068", - "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768" + "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee", + "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3" ], "markers": "python_version >= '3.8'", - "version": "==4.2.0" + "version": "==4.2.2" }, "plotly": { "hashes": [ - "sha256:69243f8c165d4be26c0df1c6f0b7b258e2dfeefe032763404ad7e7fb7d7c2073", - "sha256:a33f41fd5922e45b2b253f795b200d14452eb625790bb72d0a72cf1328a6abbf" + "sha256:76cbe78f75eddc10c56f5a4ee3e7ccaade7c0a57465546f02098c0caed6c2d1a", + "sha256:89e57d003a116303a34de6700862391367dd564222ab71f8531df70279fc0193" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==5.21.0" + "version": "==5.23.0" }, "prometheus-client": { "hashes": [ @@ -1178,33 +1264,59 @@ }, "prompt-toolkit": { "hashes": [ - "sha256:3527b7af26106cbc65a040bcc84839a3566ec1b051bb0bfe953631e704b0ff7d", - "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6" + "sha256:0d7bfa67001d5e39d02c224b663abc33687405033a8c422d0d675a5a13361d10", + "sha256:1e1b29cb58080b1e69f207c893a1a7bf16d127a5c30c9d17a25a5d77792e5360" ], "markers": "python_full_version >= '3.7.0'", - "version": "==3.0.43" + "version": "==3.0.47" + }, + "proto-plus": { + "hashes": [ + "sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445", + "sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12" + ], + "markers": "python_version >= '3.7'", + "version": "==1.24.0" + }, + "protobuf": { + "hashes": [ + "sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505", + "sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b", + "sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38", + "sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863", + "sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470", + "sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6", + "sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce", + "sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca", + "sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5", + "sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e", + "sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714" + ], + "markers": "python_version >= '3.8'", + "version": "==5.27.2" }, "psutil": { "hashes": [ - "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d", - "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73", - "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8", - "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2", - "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e", - "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36", - "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7", - "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c", - "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee", - "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421", - "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf", - "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81", - "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0", - "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631", - "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4", - "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8" + "sha256:02b69001f44cc73c1c5279d02b30a817e339ceb258ad75997325e0e6169d8b35", + "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0", + "sha256:1e7c870afcb7d91fdea2b37c24aeb08f98b6d67257a5cb0a8bc3ac68d0f1a68c", + "sha256:21f1fb635deccd510f69f485b87433460a603919b45e2a324ad65b0cc74f8fb1", + "sha256:33ea5e1c975250a720b3a6609c490db40dae5d83a4eb315170c4fe0d8b1f34b3", + "sha256:34859b8d8f423b86e4385ff3665d3f4d94be3cdf48221fbe476e883514fdb71c", + "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd", + "sha256:6ec7588fb3ddaec7344a825afe298db83fe01bfaaab39155fa84cf1c0d6b13c3", + "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0", + "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2", + "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6", + "sha256:a495580d6bae27291324fe60cea0b5a7c23fa36a7cd35035a16d93bdcf076b9d", + "sha256:a9a3dbfb4de4f18174528d87cc352d1f788b7496991cca33c6996f40c9e3c92c", + "sha256:c588a7e9b1173b6e866756dde596fd4cad94f9399daf99ad8c3258b3cb2b47a0", + "sha256:e2e8d0054fc88153ca0544f5c4d554d42e33df2e009c4ff42284ac9ebdef4132", + "sha256:fc8c9510cde0146432bbdb433322861ee8c3efbf8589865c8bf8d21cb30c4d14", + "sha256:ffe7fc9b6b36beadc8c322f84e1caff51e8703b88eee1da46d1e3a6ae11b4fd0" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", - "version": "==5.9.8" + "version": "==6.0.0" }, "ptyprocess": { "hashes": [ @@ -1216,53 +1328,69 @@ }, "pure-eval": { "hashes": [ - "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", - "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" + "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", + "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42" ], - "version": "==0.2.2" + "version": "==0.2.3" }, "pyarrow": { "hashes": [ - "sha256:033b7cad32198754d93465dcfb71d0ba7cb7cd5c9afd7052cab7214676eec38b", - "sha256:06c2bb2a98bc792f040bef31ad3e9be6a63d0cb39189227c08a7d955db96816e", - "sha256:23c6753ed4f6adb8461e7c383e418391b8d8453c5d67e17f416c3a5d5709afbd", - "sha256:248723e4ed3255fcd73edcecc209744d58a9ca852e4cf3d2577811b6d4b59818", - "sha256:25335e6f1f07fdaa026a61c758ee7d19ce824a866b27bba744348fa73bb5a440", - "sha256:28f3016958a8e45a1069303a4a4f6a7d4910643fc08adb1e2e4a7ff056272ad3", - "sha256:290e36a59a0993e9a5224ed2fb3e53375770f07379a0ea03ee2fce2e6d30b423", - "sha256:29850d050379d6e8b5a693098f4de7fd6a2bea4365bfd073d7c57c57b95041ee", - "sha256:2d4f905209de70c0eb5b2de6763104d5a9a37430f137678edfb9a675bac9cd98", - "sha256:3a4f240852b302a7af4646c8bfe9950c4691a419847001178662a98915fd7ee7", - "sha256:3e6d459c0c22f0b9c810a3917a1de3ee704b021a5fb8b3bacf968eece6df098f", - "sha256:3ff3bdfe6f1b81ca5b73b70a8d482d37a766433823e0c21e22d1d7dde76ca33f", - "sha256:4e7d9cfb5a1e648e172428c7a42b744610956f3b70f524aa3a6c02a448ba853e", - "sha256:58922e4bfece8b02abf7159f1f53a8f4d9f8e08f2d988109126c17c3bb261f22", - "sha256:5f8bc839ea36b1f99984c78e06e7a06054693dc2af8920f6fb416b5bca9944e4", - "sha256:6669799a1d4ca9da9c7e06ef48368320f5856f36f9a4dd31a11839dda3f6cc8c", - "sha256:7167107d7fb6dcadb375b4b691b7e316f4368f39f6f45405a05535d7ad5e5058", - "sha256:88b340f0a1d05b5ccc3d2d986279045655b1fe8e41aba6ca44ea28da0d1455d8", - "sha256:89722cb64286ab3d4daf168386f6968c126057b8c7ec3ef96302e81d8cdb8ae4", - "sha256:8bd2baa5fe531571847983f36a30ddbf65261ef23e496862ece83bdceb70420d", - "sha256:8c1faf2482fb89766e79745670cbca04e7018497d85be9242d5350cba21357e1", - "sha256:90adb99e8ce5f36fbecbbc422e7dcbcbed07d985eed6062e459e23f9e71fd197", - "sha256:90f19e976d9c3d8e73c80be84ddbe2f830b6304e4c576349d9360e335cd627fc", - "sha256:9c9bc803cb3b7bfacc1e96ffbfd923601065d9d3f911179d81e72d99fd74a3d9", - "sha256:a22366249bf5fd40ddacc4f03cd3160f2d7c247692945afb1899bab8a140ddfb", - "sha256:ad2459bf1f22b6a5cdcc27ebfd99307d5526b62d217b984b9f5c974651398832", - "sha256:adccc81d3dc0478ea0b498807b39a8d41628fa9210729b2f718b78cb997c7c91", - "sha256:b116e7fd7889294cbd24eb90cd9bdd3850be3738d61297855a71ac3b8124ee38", - "sha256:c2a335198f886b07e4b5ea16d08ee06557e07db54a8400cc0d03c7f6a22f785f", - "sha256:cd0ba387705044b3ac77b1b317165c0498299b08261d8122c96051024f953cd5", - "sha256:e85241b44cc3d365ef950432a1b3bd44ac54626f37b2e3a0cc89c20e45dfd8bf", - "sha256:eaa8f96cecf32da508e6c7f69bb8401f03745c050c1dd42ec2596f2e98deecac", - "sha256:f3d77463dee7e9f284ef42d341689b459a63ff2e75cee2b9302058d0d98fe142", - "sha256:f5e81dfb4e519baa6b4c80410421528c214427e77ca0ea9461eb4097c328fa33", - "sha256:f639c059035011db8c0497e541a8a45d98a58dbe34dc8fadd0ef128f2cee46e5", - "sha256:f7a197f3670606a960ddc12adbe8075cea5f707ad7bf0dffa09637fdbb89f76c" + "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", + "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", + "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", + "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", + "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", + "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", + "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", + "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", + "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", + "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", + "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", + "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", + "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", + "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", + "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", + "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", + "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", + "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", + "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", + "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", + "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", + "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", + "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", + "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", + "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", + "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", + "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", + "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", + "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", + "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", + "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", + "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", + "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", + "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", + "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", + "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==15.0.2" + "version": "==17.0.0" + }, + "pyasn1": { + "hashes": [ + "sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c", + "sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473" + ], + "markers": "python_version >= '3.8'", + "version": "==0.6.0" + }, + "pyasn1-modules": { + "hashes": [ + "sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6", + "sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b" + ], + "markers": "python_version >= '3.8'", + "version": "==0.4.0" }, "pycparser": { "hashes": [ @@ -1274,11 +1402,11 @@ }, "pygments": { "hashes": [ - "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c", - "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367" + "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199", + "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a" ], - "markers": "python_version >= '3.7'", - "version": "==2.17.2" + "markers": "python_version >= '3.8'", + "version": "==2.18.0" }, "pyparsing": { "hashes": [ @@ -1379,114 +1507,114 @@ }, "pyzmq": { "hashes": [ - "sha256:0a6ceaddc830dd3ca86cb8451cf373d1f05215368e11834538c2902ed5205139", - "sha256:0c0fed8aa9ba0488ee1cbdaa304deea92d52fab43d373297002cfcc69c0a20c5", - "sha256:0fa1416876194927f7723d6b7171b95e1115602967fc6bfccbc0d2d51d8ebae1", - "sha256:166c5e41045939a52c01e6f374e493d9a6a45dfe677360d3e7026e38c42e8906", - "sha256:18dfffe23751edee917764ffa133d5d3fef28dfd1cf3adebef8c90bc854c74c4", - "sha256:1a60a03b01e8c9c58932ec0cca15b1712d911c2800eb82d4281bc1ae5b6dad50", - "sha256:1c3d3c92fa54eda94ab369ca5b8d35059987c326ba5e55326eb068862f64b1fc", - "sha256:1c60fcdfa3229aeee4291c5d60faed3a813b18bdadb86299c4bf49e8e51e8605", - "sha256:1dd40d586ad6f53764104df6e01810fe1b4e88fd353774629a5e6fe253813f79", - "sha256:264ee0e72b72ca59279dc320deab5ae0fac0d97881aed1875ce4bde2e56ffde0", - "sha256:26721fec65846b3e4450dad050d67d31b017f97e67f7e0647b5f98aa47f828cf", - "sha256:2740fd7161b39e178554ebf21aa5667a1c9ef0cd2cb74298fd4ef017dae7aec4", - "sha256:3152bbd3a4744cbdd83dfb210ed701838b8b0c9065cef14671d6d91df12197d0", - "sha256:35391e72df6c14a09b697c7b94384947c1dd326aca883ff98ff137acdf586c33", - "sha256:37e7edfa6cf96d036a403775c96afa25058d1bb940a79786a9a2fc94a783abe3", - "sha256:3ada31cb879cd7532f4a85b501f4255c747d4813ab76b35c49ed510ce4865b45", - "sha256:3eee4c676af1b109f708d80ef0cf57ecb8aaa5900d1edaf90406aea7e0e20e37", - "sha256:40bd7ebe4dbb37d27f0c56e2a844f360239343a99be422085e13e97da13f73f9", - "sha256:44c33ebd1c62a01db7fbc24e18bdda569d6639217d13d5929e986a2b0f69070d", - "sha256:450ec234736732eb0ebeffdb95a352450d4592f12c3e087e2a9183386d22c8bf", - "sha256:45f0fd82bad4d199fa993fbf0ac586a7ac5879addbe436a35a389df7e0eb4c91", - "sha256:47c17d5ebfa88ae90f08960c97b49917098665b8cd8be31f2c24e177bcf37a0f", - "sha256:48f496bbe14686b51cec15406323ae6942851e14022efd7fc0e2ecd092c5982c", - "sha256:4ef9a79a48794099c57dc2df00340b5d47c5caa1792f9ddb8c7a26b1280bd575", - "sha256:52589f0a745ef61b9c75c872cf91f8c1f7c0668eb3dd99d7abd639d8c0fb9ca7", - "sha256:55353b8189adcfc4c125fc4ce59d477744118e9c0ec379dd0999c5fa120ac4f5", - "sha256:5d6aea92bcccfe5e5524d3c70a6f16ffdae548390ddad26f4207d55c55a40593", - "sha256:5ff56c76ce77b9805378a7a73032c17cbdb1a5b84faa1df03c5d3e306e5616df", - "sha256:606b922699fcec472ed814dda4dc3ff7c748254e0b26762a0ba21a726eb1c107", - "sha256:6472a73bc115bc40a2076609a90894775abe6faf19a78375675a2f889a613071", - "sha256:653955c6c233e90de128a1b8e882abc7216f41f44218056bd519969c8c413a15", - "sha256:6926dd14cfe6967d3322640b6d5c3c3039db71716a5e43cca6e3b474e73e0b36", - "sha256:6a967681463aa7a99eb9a62bb18229b653b45c10ff0947b31cc0837a83dfb86f", - "sha256:6cb2e41af165e5f327d06fbdd79a42a4e930267fade4e9f92d17f3ccce03f3a7", - "sha256:6d03da3a0ae691b361edcb39530075461202f699ce05adbb15055a0e1c9bcaa4", - "sha256:6e8dd2961462e337e21092ec2da0c69d814dcb1b6e892955a37444a425e9cfb8", - "sha256:6f8e6bd5d066be605faa9fe5ec10aa1a46ad9f18fc8646f2b9aaefc8fb575742", - "sha256:70770e296a9cb03d955540c99360aab861cbb3cba29516abbd106a15dbd91268", - "sha256:7753c67c570d7fc80c2dc59b90ca1196f1224e0e2e29a548980c95fe0fe27fc1", - "sha256:7a15e5465e7083c12517209c9dd24722b25e9b63c49a563922922fc03554eb35", - "sha256:7bf51970b11d67096bede97cdbad0f4333f7664f4708b9b2acb352bf4faa3140", - "sha256:83ec17729cf6d3464dab98a11e98294fcd50e6b17eaabd3d841515c23f6dbd3a", - "sha256:86531e20de249d9204cc6d8b13d5a30537748c78820215161d8a3b9ea58ca111", - "sha256:867f55e54aff254940bcec5eec068e7c0ac1e6bf360ab91479394a8bf356b0e6", - "sha256:88896b1b4817d7b2fe1ec7205c4bbe07bf5d92fb249bf2d226ddea8761996068", - "sha256:8a98b3cb0484b83c19d8fb5524c8a469cd9f10e743f5904ac285d92678ee761f", - "sha256:8d4e51632e6b12e65e8d9d7612446ecda2eda637a868afa7bce16270194650dd", - "sha256:8da0ed8a598693731c76659880a668f4748b59158f26ed283a93f7f04d47447e", - "sha256:903cc7a84a7d4326b43755c368780800e035aa3d711deae84a533fdffa8755b0", - "sha256:949067079e14ea1973bd740255e0840118c163d4bce8837f539d749f145cf5c3", - "sha256:9bce298c1ce077837e110367c321285dc4246b531cde1abfc27e4a5bbe2bed4d", - "sha256:aa5f95d71b6eca9cec28aa0a2f8310ea53dea313b63db74932879ff860c1fb8d", - "sha256:ac04f904b4fce4afea9cdccbb78e24d468cb610a839d5a698853e14e2a3f9ecf", - "sha256:ac60a980f07fa988983f7bfe6404ef3f1e4303f5288a01713bc1266df6d18783", - "sha256:b4dbc033c5ad46f8c429bf238c25a889b8c1d86bfe23a74e1031a991cb3f0000", - "sha256:b7b6d2a46c7afe2ad03ec8faf9967090c8ceae85c4d8934d17d7cae6f9062b64", - "sha256:ba77fe84fe4f5f3dc0ef681a6d366685c8ffe1c8439c1d7530997b05ac06a04b", - "sha256:bab697fc1574fee4b81da955678708567c43c813c84c91074e452bda5346c921", - "sha256:bdbc7dab0b0e9c62c97b732899c4242e3282ba803bad668e03650b59b165466e", - "sha256:becd8d8fb068fbb5a52096efd83a2d8e54354383f691781f53a4c26aee944542", - "sha256:bf77601d75ca692c179154b7e5943c286a4aaffec02c491afe05e60493ce95f2", - "sha256:c40b09b7e184d6e3e1be1c8af2cc320c0f9f610d8a5df3dd866e6e6e4e32b235", - "sha256:c770a7545b3deca2db185b59175e710a820dd4ed43619f4c02e90b0e227c6252", - "sha256:c7cc8cc009e8f6989a6d86c96f87dae5f5fb07d6c96916cdc7719d546152c7db", - "sha256:d420d856bf728713874cefb911398efe69e1577835851dd297a308a78c14c249", - "sha256:d47175f0a380bfd051726bc5c0054036ae4a5d8caf922c62c8a172ccd95c1a2a", - "sha256:d509685d1cd1d018705a811c5f9d5bc237790936ead6d06f6558b77e16cc7235", - "sha256:d566e859e8b8d5bca08467c093061774924b3d78a5ba290e82735b2569edc84b", - "sha256:d6c38806f6ecd0acf3104b8d7e76a206bcf56dadd6ce03720d2fa9d9157d5718", - "sha256:d792d3cab987058451e55c70c5926e93e2ceb68ca5a2334863bb903eb860c9cb", - "sha256:dabf796c67aa9f5a4fcc956d47f0d48b5c1ed288d628cf53aa1cf08e88654343", - "sha256:e025f6351e49d48a5aa2f5a09293aa769b0ee7369c25bed551647234b7fa0c75", - "sha256:e74b6d5ef57bb65bf1b4a37453d8d86d88550dde3fb0f23b1f1a24e60c70af5b", - "sha256:e7aa61a9cc4f0523373e31fc9255bf4567185a099f85ca3598e64de484da3ab2", - "sha256:e8158ac8616941f874841f9fa0f6d2f1466178c2ff91ea08353fdc19de0d40c2", - "sha256:e84a3161149c75bb7a7dc8646384186c34033e286a67fec1ad1bdedea165e7f4", - "sha256:e943c39c206b04df2eb5d71305761d7c3ca75fd49452115ea92db1b5b98dbdef", - "sha256:ea2c6a53e28c7066ea7db86fcc0b71d78d01b818bb11d4a4341ec35059885295", - "sha256:ed4c6ee624ecbc77b18aeeb07bf0700d26571ab95b8f723f0d02e056b5bce438", - "sha256:ee53a8191271f144cc20b12c19daa9f1546adc84a2f33839e3338039b55c373c", - "sha256:f0f9bb370449158359bb72a3e12c658327670c0ffe6fbcd1af083152b64f9df0", - "sha256:f2133de5ba9adc5f481884ccb699eac9ce789708292945c05746880f95b241c0", - "sha256:f26a05029ecd2bd306b941ff8cb80f7620b7901421052bc429d238305b1cbf2f", - "sha256:f2aca15e9ad8c8657b5b3d7ae3d1724dc8c1c1059c06b4b674c3aa36305f4930", - "sha256:f43be2bebbd09360a2f23af83b243dc25ffe7b583ea8c722e6df03e03a55f02f", - "sha256:f5e3706c32dea077faa42b1c92d825b7f86c866f72532d342e0be5e64d14d858", - "sha256:f84e33321b68ff00b60e9dbd1a483e31ab6022c577c8de525b8e771bd274ce68", - "sha256:f961423ff6236a752ced80057a20e623044df95924ed1009f844cde8b3a595f9" + "sha256:01fbfbeb8249a68d257f601deb50c70c929dc2dfe683b754659569e502fbd3aa", + "sha256:0270b49b6847f0d106d64b5086e9ad5dc8a902413b5dbbb15d12b60f9c1747a4", + "sha256:03c0ae165e700364b266876d712acb1ac02693acd920afa67da2ebb91a0b3c09", + "sha256:068ca17214038ae986d68f4a7021f97e187ed278ab6dccb79f837d765a54d753", + "sha256:082a2988364b60bb5de809373098361cf1dbb239623e39e46cb18bc035ed9c0c", + "sha256:0aaf982e68a7ac284377d051c742610220fd06d330dcd4c4dbb4cdd77c22a537", + "sha256:0c0991f5a96a8e620f7691e61178cd8f457b49e17b7d9cfa2067e2a0a89fc1d5", + "sha256:115f8359402fa527cf47708d6f8a0f8234f0e9ca0cab7c18c9c189c194dbf620", + "sha256:15c59e780be8f30a60816a9adab900c12a58d79c1ac742b4a8df044ab2a6d920", + "sha256:1b7d0e124948daa4d9686d421ef5087c0516bc6179fdcf8828b8444f8e461a77", + "sha256:1c8eb19abe87029c18f226d42b8a2c9efdd139d08f8bf6e085dd9075446db450", + "sha256:204e0f176fd1d067671157d049466869b3ae1fc51e354708b0dc41cf94e23a3a", + "sha256:2136f64fbb86451dbbf70223635a468272dd20075f988a102bf8a3f194a411dc", + "sha256:2b291d1230845871c00c8462c50565a9cd6026fe1228e77ca934470bb7d70ea0", + "sha256:2c18645ef6294d99b256806e34653e86236eb266278c8ec8112622b61db255de", + "sha256:2cc4e280098c1b192c42a849de8de2c8e0f3a84086a76ec5b07bfee29bda7d18", + "sha256:2ed8357f4c6e0daa4f3baf31832df8a33334e0fe5b020a61bc8b345a3db7a606", + "sha256:3191d312c73e3cfd0f0afdf51df8405aafeb0bad71e7ed8f68b24b63c4f36500", + "sha256:3401613148d93ef0fd9aabdbddb212de3db7a4475367f49f590c837355343972", + "sha256:34106f68e20e6ff253c9f596ea50397dbd8699828d55e8fa18bd4323d8d966e6", + "sha256:3516119f4f9b8671083a70b6afaa0a070f5683e431ab3dc26e9215620d7ca1ad", + "sha256:38ece17ec5f20d7d9b442e5174ae9f020365d01ba7c112205a4d59cf19dc38ee", + "sha256:3b4032a96410bdc760061b14ed6a33613ffb7f702181ba999df5d16fb96ba16a", + "sha256:3bf8b000a4e2967e6dfdd8656cd0757d18c7e5ce3d16339e550bd462f4857e59", + "sha256:3e3070e680f79887d60feeda051a58d0ac36622e1759f305a41059eff62c6da7", + "sha256:4496b1282c70c442809fc1b151977c3d967bfb33e4e17cedbf226d97de18f709", + "sha256:44dd6fc3034f1eaa72ece33588867df9e006a7303725a12d64c3dff92330f625", + "sha256:4adfbb5451196842a88fda3612e2c0414134874bffb1c2ce83ab4242ec9e027d", + "sha256:4b7c0c0b3244bb2275abe255d4a30c050d541c6cb18b870975553f1fb6f37527", + "sha256:4c82a6d952a1d555bf4be42b6532927d2a5686dd3c3e280e5f63225ab47ac1f5", + "sha256:5344b896e79800af86ad643408ca9aa303a017f6ebff8cee5a3163c1e9aec987", + "sha256:5bde86a2ed3ce587fa2b207424ce15b9a83a9fa14422dcc1c5356a13aed3df9d", + "sha256:5bf6c237f8c681dfb91b17f8435b2735951f0d1fad10cc5dfd96db110243370b", + "sha256:5dbb9c997932473a27afa93954bb77a9f9b786b4ccf718d903f35da3232317de", + "sha256:69ea9d6d9baa25a4dc9cef5e2b77b8537827b122214f210dd925132e34ae9b12", + "sha256:6b3146f9ae6af82c47a5282ac8803523d381b3b21caeae0327ed2f7ecb718798", + "sha256:6bcb34f869d431799c3ee7d516554797f7760cb2198ecaa89c3f176f72d062be", + "sha256:6ca08b840fe95d1c2bd9ab92dac5685f949fc6f9ae820ec16193e5ddf603c3b2", + "sha256:6ca7a9a06b52d0e38ccf6bca1aeff7be178917893f3883f37b75589d42c4ac20", + "sha256:703c60b9910488d3d0954ca585c34f541e506a091a41930e663a098d3b794c67", + "sha256:715bdf952b9533ba13dfcf1f431a8f49e63cecc31d91d007bc1deb914f47d0e4", + "sha256:72b67f966b57dbd18dcc7efbc1c7fc9f5f983e572db1877081f075004614fcdd", + "sha256:74423631b6be371edfbf7eabb02ab995c2563fee60a80a30829176842e71722a", + "sha256:77a85dca4c2430ac04dc2a2185c2deb3858a34fe7f403d0a946fa56970cf60a1", + "sha256:7821d44fe07335bea256b9f1f41474a642ca55fa671dfd9f00af8d68a920c2d4", + "sha256:788f15721c64109cf720791714dc14afd0f449d63f3a5487724f024345067381", + "sha256:7ca684ee649b55fd8f378127ac8462fb6c85f251c2fb027eb3c887e8ee347bcd", + "sha256:7daa3e1369355766dea11f1d8ef829905c3b9da886ea3152788dc25ee6079e02", + "sha256:7e6bc96ebe49604df3ec2c6389cc3876cabe475e6bfc84ced1bf4e630662cb35", + "sha256:80b12f25d805a919d53efc0a5ad7c0c0326f13b4eae981a5d7b7cc343318ebb7", + "sha256:871587bdadd1075b112e697173e946a07d722459d20716ceb3d1bd6c64bd08ce", + "sha256:88b88282e55fa39dd556d7fc04160bcf39dea015f78e0cecec8ff4f06c1fc2b5", + "sha256:8d7a498671ca87e32b54cb47c82a92b40130a26c5197d392720a1bce1b3c77cf", + "sha256:926838a535c2c1ea21c903f909a9a54e675c2126728c21381a94ddf37c3cbddf", + "sha256:971e8990c5cc4ddcff26e149398fc7b0f6a042306e82500f5e8db3b10ce69f84", + "sha256:9b273ecfbc590a1b98f014ae41e5cf723932f3b53ba9367cfb676f838038b32c", + "sha256:a42db008d58530efa3b881eeee4991146de0b790e095f7ae43ba5cc612decbc5", + "sha256:a72a84570f84c374b4c287183debc776dc319d3e8ce6b6a0041ce2e400de3f32", + "sha256:ac97a21de3712afe6a6c071abfad40a6224fd14fa6ff0ff8d0c6e6cd4e2f807a", + "sha256:acb704195a71ac5ea5ecf2811c9ee19ecdc62b91878528302dd0be1b9451cc90", + "sha256:b32bff85fb02a75ea0b68f21e2412255b5731f3f389ed9aecc13a6752f58ac97", + "sha256:b3cd31f859b662ac5d7f4226ec7d8bd60384fa037fc02aee6ff0b53ba29a3ba8", + "sha256:b63731993cdddcc8e087c64e9cf003f909262b359110070183d7f3025d1c56b5", + "sha256:b6907da3017ef55139cf0e417c5123a84c7332520e73a6902ff1f79046cd3b94", + "sha256:ba6e5e6588e49139a0979d03a7deb9c734bde647b9a8808f26acf9c547cab1bf", + "sha256:c1c8f2a2ca45292084c75bb6d3a25545cff0ed931ed228d3a1810ae3758f975f", + "sha256:ce828058d482ef860746bf532822842e0ff484e27f540ef5c813d516dd8896d2", + "sha256:d0a2d1bd63a4ad79483049b26514e70fa618ce6115220da9efdff63688808b17", + "sha256:d0cdde3c78d8ab5b46595054e5def32a755fc028685add5ddc7403e9f6de9879", + "sha256:d57dfbf9737763b3a60d26e6800e02e04284926329aee8fb01049635e957fe81", + "sha256:d8416c23161abd94cc7da80c734ad7c9f5dbebdadfdaa77dad78244457448223", + "sha256:dba7d9f2e047dfa2bca3b01f4f84aa5246725203d6284e3790f2ca15fba6b40a", + "sha256:dbf012d8fcb9f2cf0643b65df3b355fdd74fc0035d70bb5c845e9e30a3a4654b", + "sha256:e1258c639e00bf5e8a522fec6c3eaa3e30cf1c23a2f21a586be7e04d50c9acab", + "sha256:e222562dc0f38571c8b1ffdae9d7adb866363134299264a1958d077800b193b7", + "sha256:e4946d6bdb7ba972dfda282f9127e5756d4f299028b1566d1245fa0d438847e6", + "sha256:e746524418b70f38550f2190eeee834db8850088c834d4c8406fbb9bc1ae10b2", + "sha256:e76654e9dbfb835b3518f9938e565c7806976c07b37c33526b574cc1a1050480", + "sha256:e8918973fbd34e7814f59143c5f600ecd38b8038161239fd1a3d33d5817a38b8", + "sha256:e891ce81edd463b3b4c3b885c5603c00141151dd9c6936d98a680c8c72fe5c67", + "sha256:ebbbd0e728af5db9b04e56389e2299a57ea8b9dd15c9759153ee2455b32be6ad", + "sha256:eeb438a26d87c123bb318e5f2b3d86a36060b01f22fbdffd8cf247d52f7c9a2b", + "sha256:eed56b6a39216d31ff8cd2f1d048b5bf1700e4b32a01b14379c3b6dde9ce3aa3", + "sha256:f17cde1db0754c35a91ac00b22b25c11da6eec5746431d6e5092f0cd31a3fea9", + "sha256:f1a9b7d00fdf60b4039f4455afd031fe85ee8305b019334b72dcf73c567edc47", + "sha256:f4b6cecbbf3b7380f3b61de3a7b93cb721125dc125c854c14ddc91225ba52f83", + "sha256:f6b1d1c631e5940cac5a0b22c5379c86e8df6a4ec277c7a856b714021ab6cfad", + "sha256:f6c21c00478a7bea93caaaef9e7629145d4153b15a8653e8bb4609d4bc70dbfc" ], "markers": "python_version >= '3.7'", - "version": "==26.0.2" + "version": "==26.0.3" }, "referencing": { "hashes": [ - "sha256:5773bd84ef41799a5a8ca72dc34590c041eb01bf9aa02632b4a973fb0181a844", - "sha256:d53ae300ceddd3169f1ffa9caf2cb7b769e92657e4fafb23d34b93679116dfd4" + "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c", + "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de" ], "markers": "python_version >= '3.8'", - "version": "==0.34.0" + "version": "==0.35.1" }, "requests": { "hashes": [ - "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", - "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" + "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", + "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" ], "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==2.31.0" + "markers": "python_version >= '3.8'", + "version": "==2.32.3" }, "requests-oauthlib": { "hashes": [ @@ -1522,115 +1650,127 @@ }, "rpds-py": { "hashes": [ - "sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f", - "sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c", - "sha256:08231ac30a842bd04daabc4d71fddd7e6d26189406d5a69535638e4dcb88fe76", - "sha256:08f9ad53c3f31dfb4baa00da22f1e862900f45908383c062c27628754af2e88e", - "sha256:0ab39c1ba9023914297dd88ec3b3b3c3f33671baeb6acf82ad7ce883f6e8e157", - "sha256:0af039631b6de0397ab2ba16eaf2872e9f8fca391b44d3d8cac317860a700a3f", - "sha256:0b8612cd233543a3781bc659c731b9d607de65890085098986dfd573fc2befe5", - "sha256:11a8c85ef4a07a7638180bf04fe189d12757c696eb41f310d2426895356dcf05", - "sha256:1374f4129f9bcca53a1bba0bb86bf78325a0374577cf7e9e4cd046b1e6f20e24", - "sha256:1d4acf42190d449d5e89654d5c1ed3a4f17925eec71f05e2a41414689cda02d1", - "sha256:1d9a5be316c15ffb2b3c405c4ff14448c36b4435be062a7f578ccd8b01f0c4d8", - "sha256:1df3659d26f539ac74fb3b0c481cdf9d725386e3552c6fa2974f4d33d78e544b", - "sha256:22806714311a69fd0af9b35b7be97c18a0fc2826e6827dbb3a8c94eac6cf7eeb", - "sha256:2644e47de560eb7bd55c20fc59f6daa04682655c58d08185a9b95c1970fa1e07", - "sha256:2e6d75ab12b0bbab7215e5d40f1e5b738aa539598db27ef83b2ec46747df90e1", - "sha256:30f43887bbae0d49113cbaab729a112251a940e9b274536613097ab8b4899cf6", - "sha256:34b18ba135c687f4dac449aa5157d36e2cbb7c03cbea4ddbd88604e076aa836e", - "sha256:36b3ee798c58ace201289024b52788161e1ea133e4ac93fba7d49da5fec0ef9e", - "sha256:39514da80f971362f9267c600b6d459bfbbc549cffc2cef8e47474fddc9b45b1", - "sha256:39f5441553f1c2aed4de4377178ad8ff8f9d733723d6c66d983d75341de265ab", - "sha256:3a96e0c6a41dcdba3a0a581bbf6c44bb863f27c541547fb4b9711fd8cf0ffad4", - "sha256:3f26b5bd1079acdb0c7a5645e350fe54d16b17bfc5e71f371c449383d3342e17", - "sha256:41ef53e7c58aa4ef281da975f62c258950f54b76ec8e45941e93a3d1d8580594", - "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d", - "sha256:43fbac5f22e25bee1d482c97474f930a353542855f05c1161fd804c9dc74a09d", - "sha256:4457a94da0d5c53dc4b3e4de1158bdab077db23c53232f37a3cb7afdb053a4e3", - "sha256:465a3eb5659338cf2a9243e50ad9b2296fa15061736d6e26240e713522b6235c", - "sha256:482103aed1dfe2f3b71a58eff35ba105289b8d862551ea576bd15479aba01f66", - "sha256:4832d7d380477521a8c1644bbab6588dfedea5e30a7d967b5fb75977c45fd77f", - "sha256:4901165d170a5fde6f589acb90a6b33629ad1ec976d4529e769c6f3d885e3e80", - "sha256:5307def11a35f5ae4581a0b658b0af8178c65c530e94893345bebf41cc139d33", - "sha256:5417558f6887e9b6b65b4527232553c139b57ec42c64570569b155262ac0754f", - "sha256:56a737287efecafc16f6d067c2ea0117abadcd078d58721f967952db329a3e5c", - "sha256:586f8204935b9ec884500498ccc91aa869fc652c40c093bd9e1471fbcc25c022", - "sha256:5b4e7d8d6c9b2e8ee2d55c90b59c707ca59bc30058269b3db7b1f8df5763557e", - "sha256:5ddcba87675b6d509139d1b521e0c8250e967e63b5909a7e8f8944d0f90ff36f", - "sha256:618a3d6cae6ef8ec88bb76dd80b83cfe415ad4f1d942ca2a903bf6b6ff97a2da", - "sha256:635dc434ff724b178cb192c70016cc0ad25a275228f749ee0daf0eddbc8183b1", - "sha256:661d25cbffaf8cc42e971dd570d87cb29a665f49f4abe1f9e76be9a5182c4688", - "sha256:66e6a3af5a75363d2c9a48b07cb27c4ea542938b1a2e93b15a503cdfa8490795", - "sha256:67071a6171e92b6da534b8ae326505f7c18022c6f19072a81dcf40db2638767c", - "sha256:685537e07897f173abcf67258bee3c05c374fa6fff89d4c7e42fb391b0605e98", - "sha256:69e64831e22a6b377772e7fb337533c365085b31619005802a79242fee620bc1", - "sha256:6b0817e34942b2ca527b0e9298373e7cc75f429e8da2055607f4931fded23e20", - "sha256:6c81e5f372cd0dc5dc4809553d34f832f60a46034a5f187756d9b90586c2c307", - "sha256:6d7faa6f14017c0b1e69f5e2c357b998731ea75a442ab3841c0dbbbfe902d2c4", - "sha256:6ef0befbb5d79cf32d0266f5cff01545602344eda89480e1dd88aca964260b18", - "sha256:6ef687afab047554a2d366e112dd187b62d261d49eb79b77e386f94644363294", - "sha256:7223a2a5fe0d217e60a60cdae28d6949140dde9c3bcc714063c5b463065e3d66", - "sha256:77f195baa60a54ef9d2de16fbbfd3ff8b04edc0c0140a761b56c267ac11aa467", - "sha256:793968759cd0d96cac1e367afd70c235867831983f876a53389ad869b043c948", - "sha256:7bd339195d84439cbe5771546fe8a4e8a7a045417d8f9de9a368c434e42a721e", - "sha256:7cd863afe7336c62ec78d7d1349a2f34c007a3cc6c2369d667c65aeec412a5b1", - "sha256:7f2facbd386dd60cbbf1a794181e6aa0bd429bd78bfdf775436020172e2a23f0", - "sha256:84ffab12db93b5f6bad84c712c92060a2d321b35c3c9960b43d08d0f639d60d7", - "sha256:8c8370641f1a7f0e0669ddccca22f1da893cef7628396431eb445d46d893e5cd", - "sha256:8db715ebe3bb7d86d77ac1826f7d67ec11a70dbd2376b7cc214199360517b641", - "sha256:8e8916ae4c720529e18afa0b879473049e95949bf97042e938530e072fde061d", - "sha256:8f03bccbd8586e9dd37219bce4d4e0d3ab492e6b3b533e973fa08a112cb2ffc9", - "sha256:8f2fc11e8fe034ee3c34d316d0ad8808f45bc3b9ce5857ff29d513f3ff2923a1", - "sha256:923d39efa3cfb7279a0327e337a7958bff00cc447fd07a25cddb0a1cc9a6d2da", - "sha256:93df1de2f7f7239dc9cc5a4a12408ee1598725036bd2dedadc14d94525192fc3", - "sha256:998e33ad22dc7ec7e030b3df701c43630b5bc0d8fbc2267653577e3fec279afa", - "sha256:99f70b740dc04d09e6b2699b675874367885217a2e9f782bdf5395632ac663b7", - "sha256:9a00312dea9310d4cb7dbd7787e722d2e86a95c2db92fbd7d0155f97127bcb40", - "sha256:9d54553c1136b50fd12cc17e5b11ad07374c316df307e4cfd6441bea5fb68496", - "sha256:9dbbeb27f4e70bfd9eec1be5477517365afe05a9b2c441a0b21929ee61048124", - "sha256:a1ce3ba137ed54f83e56fb983a5859a27d43a40188ba798993812fed73c70836", - "sha256:a34d557a42aa28bd5c48a023c570219ba2593bcbbb8dc1b98d8cf5d529ab1434", - "sha256:a5f446dd5055667aabaee78487f2b5ab72e244f9bc0b2ffebfeec79051679984", - "sha256:ad36cfb355e24f1bd37cac88c112cd7730873f20fb0bdaf8ba59eedf8216079f", - "sha256:aec493917dd45e3c69d00a8874e7cbed844efd935595ef78a0f25f14312e33c6", - "sha256:b316144e85316da2723f9d8dc75bada12fa58489a527091fa1d5a612643d1a0e", - "sha256:b34ae4636dfc4e76a438ab826a0d1eed2589ca7d9a1b2d5bb546978ac6485461", - "sha256:b34b7aa8b261c1dbf7720b5d6f01f38243e9b9daf7e6b8bc1fd4657000062f2c", - "sha256:bc362ee4e314870a70f4ae88772d72d877246537d9f8cb8f7eacf10884862432", - "sha256:bed88b9a458e354014d662d47e7a5baafd7ff81c780fd91584a10d6ec842cb73", - "sha256:c0013fe6b46aa496a6749c77e00a3eb07952832ad6166bd481c74bda0dcb6d58", - "sha256:c0b5dcf9193625afd8ecc92312d6ed78781c46ecbf39af9ad4681fc9f464af88", - "sha256:c4325ff0442a12113a6379af66978c3fe562f846763287ef66bdc1d57925d337", - "sha256:c463ed05f9dfb9baebef68048aed8dcdc94411e4bf3d33a39ba97e271624f8f7", - "sha256:c8362467a0fdeccd47935f22c256bec5e6abe543bf0d66e3d3d57a8fb5731863", - "sha256:cd5bf1af8efe569654bbef5a3e0a56eca45f87cfcffab31dd8dde70da5982475", - "sha256:cf1ea2e34868f6fbf070e1af291c8180480310173de0b0c43fc38a02929fc0e3", - "sha256:d62dec4976954a23d7f91f2f4530852b0c7608116c257833922a896101336c51", - "sha256:d68c93e381010662ab873fea609bf6c0f428b6d0bb00f2c6939782e0818d37bf", - "sha256:d7c36232a90d4755b720fbd76739d8891732b18cf240a9c645d75f00639a9024", - "sha256:dd18772815d5f008fa03d2b9a681ae38d5ae9f0e599f7dda233c439fcaa00d40", - "sha256:ddc2f4dfd396c7bfa18e6ce371cba60e4cf9d2e5cdb71376aa2da264605b60b9", - "sha256:e003b002ec72c8d5a3e3da2989c7d6065b47d9eaa70cd8808b5384fbb970f4ec", - "sha256:e32a92116d4f2a80b629778280103d2a510a5b3f6314ceccd6e38006b5e92dcb", - "sha256:e4461d0f003a0aa9be2bdd1b798a041f177189c1a0f7619fe8c95ad08d9a45d7", - "sha256:e541ec6f2ec456934fd279a3120f856cd0aedd209fc3852eca563f81738f6861", - "sha256:e546e768d08ad55b20b11dbb78a745151acbd938f8f00d0cfbabe8b0199b9880", - "sha256:ea7d4a99f3b38c37eac212dbd6ec42b7a5ec51e2c74b5d3223e43c811609e65f", - "sha256:ed4eb745efbff0a8e9587d22a84be94a5eb7d2d99c02dacf7bd0911713ed14dd", - "sha256:f8a2f084546cc59ea99fda8e070be2fd140c3092dc11524a71aa8f0f3d5a55ca", - "sha256:fcb25daa9219b4cf3a0ab24b0eb9a5cc8949ed4dc72acb8fa16b7e1681aa3c58", - "sha256:fdea4952db2793c4ad0bdccd27c1d8fdd1423a92f04598bc39425bcc2b8ee46e" - ], - "markers": "python_version >= '3.8'", - "version": "==0.18.0" + "sha256:01227f8b3e6c8961490d869aa65c99653df80d2f0a7fde8c64ebddab2b9b02fd", + "sha256:08ce9c95a0b093b7aec75676b356a27879901488abc27e9d029273d280438505", + "sha256:0b02dd77a2de6e49078c8937aadabe933ceac04b41c5dde5eca13a69f3cf144e", + "sha256:0d4b52811dcbc1aba08fd88d475f75b4f6db0984ba12275d9bed1a04b2cae9b5", + "sha256:13e6d4840897d4e4e6b2aa1443e3a8eca92b0402182aafc5f4ca1f5e24f9270a", + "sha256:1a129c02b42d46758c87faeea21a9f574e1c858b9f358b6dd0bbd71d17713175", + "sha256:1a8dfa125b60ec00c7c9baef945bb04abf8ac772d8ebefd79dae2a5f316d7850", + "sha256:1c32e41de995f39b6b315d66c27dea3ef7f7c937c06caab4c6a79a5e09e2c415", + "sha256:1d494887d40dc4dd0d5a71e9d07324e5c09c4383d93942d391727e7a40ff810b", + "sha256:1d4af2eb520d759f48f1073ad3caef997d1bfd910dc34e41261a595d3f038a94", + "sha256:1fb93d3486f793d54a094e2bfd9cd97031f63fcb5bc18faeb3dd4b49a1c06523", + "sha256:24f8ae92c7fae7c28d0fae9b52829235df83f34847aa8160a47eb229d9666c7b", + "sha256:24fc5a84777cb61692d17988989690d6f34f7f95968ac81398d67c0d0994a897", + "sha256:26ab43b6d65d25b1a333c8d1b1c2f8399385ff683a35ab5e274ba7b8bb7dc61c", + "sha256:271accf41b02687cef26367c775ab220372ee0f4925591c6796e7c148c50cab5", + "sha256:2ddd50f18ebc05ec29a0d9271e9dbe93997536da3546677f8ca00b76d477680c", + "sha256:31dd5794837f00b46f4096aa8ccaa5972f73a938982e32ed817bb520c465e520", + "sha256:31e450840f2f27699d014cfc8865cc747184286b26d945bcea6042bb6aa4d26e", + "sha256:32e0db3d6e4f45601b58e4ac75c6f24afbf99818c647cc2066f3e4b192dabb1f", + "sha256:346557f5b1d8fd9966059b7a748fd79ac59f5752cd0e9498d6a40e3ac1c1875f", + "sha256:34bca66e2e3eabc8a19e9afe0d3e77789733c702c7c43cd008e953d5d1463fde", + "sha256:3511f6baf8438326e351097cecd137eb45c5f019944fe0fd0ae2fea2fd26be39", + "sha256:35af5e4d5448fa179fd7fff0bba0fba51f876cd55212f96c8bbcecc5c684ae5c", + "sha256:3837c63dd6918a24de6c526277910e3766d8c2b1627c500b155f3eecad8fad65", + "sha256:39d67896f7235b2c886fb1ee77b1491b77049dcef6fbf0f401e7b4cbed86bbd4", + "sha256:3b823be829407393d84ee56dc849dbe3b31b6a326f388e171555b262e8456cc1", + "sha256:3c73254c256081704dba0a333457e2fb815364018788f9b501efe7c5e0ada401", + "sha256:3ddab996807c6b4227967fe1587febade4e48ac47bb0e2d3e7858bc621b1cace", + "sha256:3e1dc59a5e7bc7f44bd0c048681f5e05356e479c50be4f2c1a7089103f1621d5", + "sha256:4383beb4a29935b8fa28aca8fa84c956bf545cb0c46307b091b8d312a9150e6a", + "sha256:4cc4bc73e53af8e7a42c8fd7923bbe35babacfa7394ae9240b3430b5dcf16b2a", + "sha256:4dd02e29c8cbed21a1875330b07246b71121a1c08e29f0ee3db5b4cfe16980c4", + "sha256:4f580ae79d0b861dfd912494ab9d477bea535bfb4756a2269130b6607a21802e", + "sha256:53dbc35808c6faa2ce3e48571f8f74ef70802218554884787b86a30947842a14", + "sha256:56313be667a837ff1ea3508cebb1ef6681d418fa2913a0635386cf29cff35165", + "sha256:57863d16187995c10fe9cf911b897ed443ac68189179541734502353af33e693", + "sha256:5953391af1405f968eb5701ebbb577ebc5ced8d0041406f9052638bafe52209d", + "sha256:5beffdbe766cfe4fb04f30644d822a1080b5359df7db3a63d30fa928375b2720", + "sha256:5e360188b72f8080fefa3adfdcf3618604cc8173651c9754f189fece068d2a45", + "sha256:5e58b61dcbb483a442c6239c3836696b79f2cd8e7eec11e12155d3f6f2d886d1", + "sha256:69084fd29bfeff14816666c93a466e85414fe6b7d236cfc108a9c11afa6f7301", + "sha256:6d1d7539043b2b31307f2c6c72957a97c839a88b2629a348ebabe5aa8b626d6b", + "sha256:6d8b735c4d162dc7d86a9cf3d717f14b6c73637a1f9cd57fe7e61002d9cb1972", + "sha256:6ea961a674172ed2235d990d7edf85d15d8dfa23ab8575e48306371c070cda67", + "sha256:71157f9db7f6bc6599a852852f3389343bea34315b4e6f109e5cbc97c1fb2963", + "sha256:720f3108fb1bfa32e51db58b832898372eb5891e8472a8093008010911e324c5", + "sha256:74129d5ffc4cde992d89d345f7f7d6758320e5d44a369d74d83493429dad2de5", + "sha256:747251e428406b05fc86fee3904ee19550c4d2d19258cef274e2151f31ae9d38", + "sha256:75130df05aae7a7ac171b3b5b24714cffeabd054ad2ebc18870b3aa4526eba23", + "sha256:7b3661e6d4ba63a094138032c1356d557de5b3ea6fd3cca62a195f623e381c76", + "sha256:7d5c7e32f3ee42f77d8ff1a10384b5cdcc2d37035e2e3320ded909aa192d32c3", + "sha256:8124101e92c56827bebef084ff106e8ea11c743256149a95b9fd860d3a4f331f", + "sha256:81db2e7282cc0487f500d4db203edc57da81acde9e35f061d69ed983228ffe3b", + "sha256:840e18c38098221ea6201f091fc5d4de6128961d2930fbbc96806fb43f69aec1", + "sha256:89cc8921a4a5028d6dd388c399fcd2eef232e7040345af3d5b16c04b91cf3c7e", + "sha256:8b32cd4ab6db50c875001ba4f5a6b30c0f42151aa1fbf9c2e7e3674893fb1dc4", + "sha256:8df1c283e57c9cb4d271fdc1875f4a58a143a2d1698eb0d6b7c0d7d5f49c53a1", + "sha256:902cf4739458852fe917104365ec0efbea7d29a15e4276c96a8d33e6ed8ec137", + "sha256:97fbb77eaeb97591efdc654b8b5f3ccc066406ccfb3175b41382f221ecc216e8", + "sha256:9c7042488165f7251dc7894cd533a875d2875af6d3b0e09eda9c4b334627ad1c", + "sha256:9e318e6786b1e750a62f90c6f7fa8b542102bdcf97c7c4de2a48b50b61bd36ec", + "sha256:a9421b23c85f361a133aa7c5e8ec757668f70343f4ed8fdb5a4a14abd5437244", + "sha256:aaf71f95b21f9dc708123335df22e5a2fef6307e3e6f9ed773b2e0938cc4d491", + "sha256:afedc35fe4b9e30ab240b208bb9dc8938cb4afe9187589e8d8d085e1aacb8309", + "sha256:b5e28e56143750808c1c79c70a16519e9bc0a68b623197b96292b21b62d6055c", + "sha256:b82c9514c6d74b89a370c4060bdb80d2299bc6857e462e4a215b4ef7aa7b090e", + "sha256:b8f78398e67a7227aefa95f876481485403eb974b29e9dc38b307bb6eb2315ea", + "sha256:bbda75f245caecff8faa7e32ee94dfaa8312a3367397975527f29654cd17a6ed", + "sha256:bca34e913d27401bda2a6f390d0614049f5a95b3b11cd8eff80fe4ec340a1208", + "sha256:bd04d8cab16cab5b0a9ffc7d10f0779cf1120ab16c3925404428f74a0a43205a", + "sha256:c149a652aeac4902ecff2dd93c3b2681c608bd5208c793c4a99404b3e1afc87c", + "sha256:c2087dbb76a87ec2c619253e021e4fb20d1a72580feeaa6892b0b3d955175a71", + "sha256:c34f751bf67cab69638564eee34023909380ba3e0d8ee7f6fe473079bf93f09b", + "sha256:c6d20c8896c00775e6f62d8373aba32956aa0b850d02b5ec493f486c88e12859", + "sha256:c7af6f7b80f687b33a4cdb0a785a5d4de1fb027a44c9a049d8eb67d5bfe8a687", + "sha256:c7b07959866a6afb019abb9564d8a55046feb7a84506c74a6f197cbcdf8a208e", + "sha256:ca0dda0c5715efe2ab35bb83f813f681ebcd2840d8b1b92bfc6fe3ab382fae4a", + "sha256:cdb7eb3cf3deb3dd9e7b8749323b5d970052711f9e1e9f36364163627f96da58", + "sha256:ce757c7c90d35719b38fa3d4ca55654a76a40716ee299b0865f2de21c146801c", + "sha256:d1fa67ef839bad3815124f5f57e48cd50ff392f4911a9f3cf449d66fa3df62a5", + "sha256:d2dbd8f4990d4788cb122f63bf000357533f34860d269c1a8e90ae362090ff3a", + "sha256:d4ec0046facab83012d821b33cead742a35b54575c4edfb7ed7445f63441835f", + "sha256:dbceedcf4a9329cc665452db1aaf0845b85c666e4885b92ee0cddb1dbf7e052a", + "sha256:dc733d35f861f8d78abfaf54035461e10423422999b360966bf1c443cbc42705", + "sha256:dd635c2c4043222d80d80ca1ac4530a633102a9f2ad12252183bcf338c1b9474", + "sha256:de1f7cd5b6b351e1afd7568bdab94934d656abe273d66cda0ceea43bbc02a0c2", + "sha256:df7c841813f6265e636fe548a49664c77af31ddfa0085515326342a751a6ba51", + "sha256:e0f9d268b19e8f61bf42a1da48276bcd05f7ab5560311f541d22557f8227b866", + "sha256:e2d66eb41ffca6cc3c91d8387509d27ba73ad28371ef90255c50cb51f8953301", + "sha256:e429fc517a1c5e2a70d576077231538a98d59a45dfc552d1ac45a132844e6dfb", + "sha256:e4d2b88efe65544a7d5121b0c3b003ebba92bfede2ea3577ce548b69c5235185", + "sha256:e76c902d229a3aa9d5ceb813e1cbcc69bf5bda44c80d574ff1ac1fa3136dea71", + "sha256:ef07a0a1d254eeb16455d839cef6e8c2ed127f47f014bbda64a58b5482b6c836", + "sha256:f09529d2332264a902688031a83c19de8fda5eb5881e44233286b9c9ec91856d", + "sha256:f0a6d4a93d2a05daec7cb885157c97bbb0be4da739d6f9dfb02e101eb40921cd", + "sha256:f0cf2a0dbb5987da4bd92a7ca727eadb225581dd9681365beba9accbe5308f7d", + "sha256:f2671cb47e50a97f419a02cd1e0c339b31de017b033186358db92f4d8e2e17d8", + "sha256:f35b34a5184d5e0cc360b61664c1c06e866aab077b5a7c538a3e20c8fcdbf90b", + "sha256:f3d73022990ab0c8b172cce57c69fd9a89c24fd473a5e79cbce92df87e3d9c48", + "sha256:f5b8353ea1a4d7dfb59a7f45c04df66ecfd363bb5b35f33b11ea579111d4655f", + "sha256:f809a17cc78bd331e137caa25262b507225854073fd319e987bd216bed911b7c", + "sha256:f9bc4161bd3b970cd6a6fcda70583ad4afd10f2750609fb1f3ca9505050d4ef3", + "sha256:fdf4890cda3b59170009d012fca3294c00140e7f2abe1910e6a730809d0f3f9b" + ], + "markers": "python_version >= '3.8'", + "version": "==0.19.1" + }, + "rsa": { + "hashes": [ + "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", + "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21" + ], + "markers": "python_version >= '3.6' and python_version < '4'", + "version": "==4.9" }, "schema": { "hashes": [ - "sha256:f06717112c61895cabc4707752b88716e8420a8819d71404501e114f91043197", - "sha256:f3ffdeeada09ec34bf40d7d79996d9f7175db93b7a5065de0faa7f41083c1e6c" + "sha256:5d976a5b50f36e74e2157b47097b60002bd4d42e65425fcc9c9befadb4255dde", + "sha256:7da553abd2958a19dc2547c388cde53398b39196175a9be59ea1caf5ab0a1807" ], - "version": "==0.7.5" + "version": "==0.7.7" }, "seaborn": { "hashes": [ @@ -1649,6 +1789,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==1.8.3" }, + "setuptools": { + "hashes": [ + "sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1", + "sha256:8d243eff56d095e5817f796ede6ae32941278f542e0f941867cc05ae52b162ec" + ], + "markers": "python_version >= '3.8'", + "version": "==72.1.0" + }, "six": { "hashes": [ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", @@ -1657,6 +1805,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, + "smmap": { + "hashes": [ + "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62", + "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da" + ], + "markers": "python_version >= '3.7'", + "version": "==5.0.1" + }, "sniffio": { "hashes": [ "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", @@ -1682,11 +1838,11 @@ }, "tenacity": { "hashes": [ - "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a", - "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c" + "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b", + "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539" ], - "markers": "python_version >= '3.7'", - "version": "==8.2.3" + "markers": "python_version >= '3.8'", + "version": "==9.0.0" }, "terminado": { "hashes": [ @@ -1698,36 +1854,36 @@ }, "tinycss2": { "hashes": [ - "sha256:2b80a96d41e7c3914b8cda8bc7f705a4d9c49275616e886103dd839dfc847847", - "sha256:8cff3a8f066c2ec677c06dbc7b45619804a6938478d9d73c284b29d14ecb0627" + "sha256:152f9acabd296a8375fbca5b84c961ff95971fcfc32e79550c8df8e29118c54d", + "sha256:54a8dbdffb334d536851be0226030e9505965bb2f30f21a4a82c55fb2a80fae7" ], - "markers": "python_version >= '3.7'", - "version": "==1.2.1" + "markers": "python_version >= '3.8'", + "version": "==1.3.0" }, "tornado": { "hashes": [ - "sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0", - "sha256:10aeaa8006333433da48dec9fe417877f8bcc21f48dda8d661ae79da357b2a63", - "sha256:27787de946a9cffd63ce5814c33f734c627a87072ec7eed71f7fc4417bb16263", - "sha256:6f8a6c77900f5ae93d8b4ae1196472d0ccc2775cc1dfdc9e7727889145c45052", - "sha256:71ddfc23a0e03ef2df1c1397d859868d158c8276a0603b96cf86892bff58149f", - "sha256:72291fa6e6bc84e626589f1c29d90a5a6d593ef5ae68052ee2ef000dfd273dee", - "sha256:88b84956273fbd73420e6d4b8d5ccbe913c65d31351b4c004ae362eba06e1f78", - "sha256:e43bc2e5370a6a8e413e1e1cd0c91bedc5bd62a74a532371042a18ef19e10579", - "sha256:f0251554cdd50b4b44362f73ad5ba7126fc5b2c2895cc62b14a1c2d7ea32f212", - "sha256:f7894c581ecdcf91666a0912f18ce5e757213999e183ebfc2c3fdbf4d5bd764e", - "sha256:fd03192e287fbd0899dd8f81c6fb9cbbc69194d2074b38f384cb6fa72b80e9c2" + "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8", + "sha256:25486eb223babe3eed4b8aecbac33b37e3dd6d776bc730ca14e1bf93888b979f", + "sha256:454db8a7ecfcf2ff6042dde58404164d969b6f5d58b926da15e6b23817950fc4", + "sha256:613bf4ddf5c7a95509218b149b555621497a6cc0d46ac341b30bd9ec19eac7f3", + "sha256:6d5ce3437e18a2b66fbadb183c1d3364fb03f2be71299e7d10dbeeb69f4b2a14", + "sha256:8ae50a504a740365267b2a8d1a90c9fbc86b780a39170feca9bcc1787ff80842", + "sha256:92d3ab53183d8c50f8204a51e6f91d18a15d5ef261e84d452800d4ff6fc504e9", + "sha256:a02a08cc7a9314b006f653ce40483b9b3c12cda222d6a46d4ac63bb6c9057698", + "sha256:b24b8982ed444378d7f21d563f4180a2de31ced9d8d84443907a0a64da2072e7", + "sha256:d9a566c40b89757c9aa8e6f032bcdb8ca8795d7c1a9762910c722b1635c9de4d", + "sha256:e2e20b9113cd7293f164dc46fffb13535266e713cdb87bd2d15ddb336e96cfc4" ], "markers": "python_version >= '3.8'", - "version": "==6.4" + "version": "==6.4.1" }, "tqdm": { "hashes": [ - "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9", - "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531" + "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644", + "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb" ], "markers": "python_version >= '3.7'", - "version": "==4.66.2" + "version": "==4.66.4" }, "traitlets": { "hashes": [ @@ -1747,11 +1903,11 @@ }, "typing-extensions": { "hashes": [ - "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0", - "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a" + "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", + "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8" ], "markers": "python_version < '3.12'", - "version": "==4.11.0" + "version": "==4.12.2" }, "tzdata": { "hashes": [ @@ -1768,14 +1924,22 @@ ], "version": "==1.3.0" }, + "uritemplate": { + "hashes": [ + "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0", + "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e" + ], + "markers": "python_version >= '3.6'", + "version": "==4.1.1" + }, "urllib3": { "hashes": [ - "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d", - "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19" + "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", + "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==2.2.1" + "version": "==2.2.2" }, "wcwidth": { "hashes": [ @@ -1786,10 +1950,10 @@ }, "webcolors": { "hashes": [ - "sha256:29bc7e8752c0a1bd4a1f03c14d6e6a72e93d82193738fa860cbff59d0fcc11bf", - "sha256:c225b674c83fa923be93d235330ce0300373d02885cef23238813b0d5668304a" + "sha256:1d160d1de46b3e81e58d0a280d0c78b467dc80f47294b91b1ad8029d2cedb55b", + "sha256:8cf5bc7e28defd1d48b9e83d5fc30741328305a8195c29a8e668fa45586568a1" ], - "version": "==1.13" + "version": "==24.6.0" }, "webencodings": { "hashes": [ @@ -1800,11 +1964,11 @@ }, "websocket-client": { "hashes": [ - "sha256:10e511ea3a8c744631d3bd77e61eb17ed09304c413ad42cf6ddfa4c7787e8fe6", - "sha256:f4c3d22fec12a2461427a29957ff07d35098ee2d976d3ba244e688b8b4057588" + "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", + "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da" ], "markers": "python_version >= '3.8'", - "version": "==1.7.0" + "version": "==1.8.0" }, "wordcloud": { "hashes": [ @@ -1891,32 +2055,32 @@ "jupyter" ], "hashes": [ - "sha256:1bb9ca06e556a09f7f7177bc7cb604e5ed2d2df1e9119e4f7d2f1f7071c32e5d", - "sha256:21f9407063ec71c5580b8ad975653c66508d6a9f57bd008bb8691d273705adcd", - "sha256:4396ca365a4310beef84d446ca5016f671b10f07abdba3e4e4304218d2c71d33", - "sha256:44d99dfdf37a2a00a6f7a8dcbd19edf361d056ee51093b2445de7ca09adac965", - "sha256:5cd5b4f76056cecce3e69b0d4c228326d2595f506797f40b9233424e2524c070", - "sha256:64578cf99b6b46a6301bc28bdb89f9d6f9b592b1c5837818a177c98525dbe397", - "sha256:64e60a7edd71fd542a10a9643bf369bfd2644de95ec71e86790b063aa02ff745", - "sha256:652e55bb722ca026299eb74e53880ee2315b181dfdd44dca98e43448620ddec1", - "sha256:6644f97a7ef6f401a150cca551a1ff97e03c25d8519ee0bbc9b0058772882665", - "sha256:6ad001a9ddd9b8dfd1b434d566be39b1cd502802c8d38bbb1ba612afda2ef436", - "sha256:71d998b73c957444fb7c52096c3843875f4b6b47a54972598741fe9a7f737fcb", - "sha256:74eb9b5420e26b42c00a3ff470dc0cd144b80a766128b1771d07643165e08d0e", - "sha256:75a2d0b4f5eb81f7eebc31f788f9830a6ce10a68c91fbe0fade34fff7a2836e6", - "sha256:7852b05d02b5b9a8c893ab95863ef8986e4dda29af80bbbda94d7aee1abf8702", - "sha256:7f2966b9b2b3b7104fca9d75b2ee856fe3fdd7ed9e47c753a4bb1a675f2caab8", - "sha256:8e5537f456a22cf5cfcb2707803431d2feeb82ab3748ade280d6ccd0b40ed2e8", - "sha256:d4e71cdebdc8efeb6deaf5f2deb28325f8614d48426bed118ecc2dcaefb9ebf3", - "sha256:dae79397f367ac8d7adb6c779813328f6d690943f64b32983e896bcccd18cbad", - "sha256:e3a3a092b8b756c643fe45f4624dbd5a389f770a4ac294cf4d0fce6af86addaf", - "sha256:eb949f56a63c5e134dfdca12091e98ffb5fd446293ebae123d10fc1abad00b9e", - "sha256:f07b69fda20578367eaebbd670ff8fc653ab181e1ff95d84497f9fa20e7d0641", - "sha256:f95cece33329dc4aa3b0e1a771c41075812e46cf3d6e3f1dfe3d91ff09826ed2" + "sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474", + "sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1", + "sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0", + "sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8", + "sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96", + "sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1", + "sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04", + "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021", + "sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94", + "sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d", + "sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c", + "sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7", + "sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c", + "sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc", + "sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7", + "sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d", + "sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c", + "sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741", + "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce", + "sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb", + "sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063", + "sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==24.4.0" + "version": "==24.4.2" }, "cfgv": { "hashes": [ @@ -1959,36 +2123,36 @@ }, "filelock": { "hashes": [ - "sha256:404e5e9253aa60ad457cae1be07c0f0ca90a63931200a47d9b6a6af84fd7b45f", - "sha256:d13f466618bfde72bd2c18255e269f72542c6e70e7bac83a0232d6b1cc5c8cf4" + "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb", + "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7" ], "markers": "python_version >= '3.8'", - "version": "==3.13.4" + "version": "==3.15.4" }, "flake8": { "hashes": [ - "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132", - "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3" + "sha256:2e416edcc62471a64cea09353f4e7bdba32aeb079b6e360554c659a122b1bc6a", + "sha256:48a07b626b55236e0fb4784ee69a465fbf59d79eec1f5b4785c3d3bc57d17aa5" ], "index": "pypi", "markers": "python_full_version >= '3.8.1'", - "version": "==7.0.0" + "version": "==7.1.0" }, "identify": { "hashes": [ - "sha256:10a7ca245cfcd756a554a7288159f72ff105ad233c7c4b9c6f0f4d108f5f6791", - "sha256:c4de0081837b211594f8e877a6b4fad7ca32bbfc1a9307fdd61c28bfe923f13e" + "sha256:cb171c685bdc31bcc4c1734698736a7d5b6c8bf2e0c15117f4d469c8640ae5cf", + "sha256:e79ae4406387a9d300332b5fd366d8994f1525e8414984e1a59e058b2eda2dd0" ], "markers": "python_version >= '3.8'", - "version": "==2.5.35" + "version": "==2.6.0" }, "ipython": { "hashes": [ - "sha256:07232af52a5ba146dc3372c7bf52a0f890a23edf38d77caef8d53f9cdc2584c1", - "sha256:7468edaf4f6de3e1b912e57f66c241e6fd3c7099f2ec2136e239e142e800274d" + "sha256:1cec0fbba8404af13facebe83d04436a7434c7400e59f47acf467c64abd0956c", + "sha256:e6b347c27bdf9c32ee9d31ae85defc525755a1869f14057e900675b9e8d6e6ff" ], "markers": "python_version >= '3.10'", - "version": "==8.23.0" + "version": "==8.26.0" }, "isort": { "hashes": [ @@ -2033,19 +2197,19 @@ }, "nodeenv": { "hashes": [ - "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2", - "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec" + "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", + "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", - "version": "==1.8.0" + "version": "==1.9.1" }, "packaging": { "hashes": [ - "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5", - "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9" + "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", + "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" ], - "markers": "python_version >= '3.7'", - "version": "==24.0" + "markers": "python_version >= '3.8'", + "version": "==24.1" }, "parso": { "hashes": [ @@ -2073,28 +2237,28 @@ }, "platformdirs": { "hashes": [ - "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068", - "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768" + "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee", + "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3" ], "markers": "python_version >= '3.8'", - "version": "==4.2.0" + "version": "==4.2.2" }, "pre-commit": { "hashes": [ - "sha256:5eae9e10c2b5ac51577c3452ec0a490455c45a0533f7960f993a0d01e59decab", - "sha256:e209d61b8acdcf742404408531f0c37d49d2c734fd7cff2d6076083d191cb060" + "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af", + "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==3.7.0" + "version": "==3.8.0" }, "prompt-toolkit": { "hashes": [ - "sha256:3527b7af26106cbc65a040bcc84839a3566ec1b051bb0bfe953631e704b0ff7d", - "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6" + "sha256:0d7bfa67001d5e39d02c224b663abc33687405033a8c422d0d675a5a13361d10", + "sha256:1e1b29cb58080b1e69f207c893a1a7bf16d127a5c30c9d17a25a5d77792e5360" ], "markers": "python_full_version >= '3.7.0'", - "version": "==3.0.43" + "version": "==3.0.47" }, "ptyprocess": { "hashes": [ @@ -2106,18 +2270,18 @@ }, "pure-eval": { "hashes": [ - "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", - "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" + "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", + "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42" ], - "version": "==0.2.2" + "version": "==0.2.3" }, "pycodestyle": { "hashes": [ - "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f", - "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67" + "sha256:442f950141b4f43df752dd303511ffded3a04c2b6fb7f65980574f0c31e6e79c", + "sha256:949a39f6b86c3e1515ba1787c2022131d165a8ad271b11370a8819aa070269e4" ], "markers": "python_version >= '3.8'", - "version": "==2.11.1" + "version": "==2.12.0" }, "pyflakes": { "hashes": [ @@ -2129,11 +2293,11 @@ }, "pygments": { "hashes": [ - "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c", - "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367" + "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199", + "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a" ], - "markers": "python_version >= '3.7'", - "version": "==2.17.2" + "markers": "python_version >= '3.8'", + "version": "==2.18.0" }, "pyyaml": { "hashes": [ @@ -2192,14 +2356,6 @@ "markers": "python_version >= '3.6'", "version": "==6.0.1" }, - "setuptools": { - "hashes": [ - "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987", - "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32" - ], - "markers": "python_version >= '3.8'", - "version": "==69.5.1" - }, "six": { "hashes": [ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", @@ -2232,19 +2388,19 @@ }, "typing-extensions": { "hashes": [ - "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0", - "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a" + "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", + "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8" ], "markers": "python_version < '3.12'", - "version": "==4.11.0" + "version": "==4.12.2" }, "virtualenv": { "hashes": [ - "sha256:7bb554bbdfeaacc3349fa614ea5bff6ac300fc7c335e9facf3a3bcfc703f45be", - "sha256:8aac4332f2ea6ef519c648d0bc48a5b1d324994753519919bddbb1aff25a104e" + "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a", + "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589" ], "markers": "python_version >= '3.7'", - "version": "==20.25.3" + "version": "==20.26.3" }, "wcwidth": { "hashes": [ diff --git a/README.md b/README.md index b51a60bc..58b7fb69 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,49 @@ See [`CONTRIBUTING.md`][org-contrib]. [org-contrib]: https://github.com/creativecommons/.github/blob/main/CONTRIBUTING.md +### Project structure + +Please note that in the directory tree below, all instances of `fetch`, +`process`, and `report` are referring to the three phases of data gathering, +processing, and report generation. + +``` +Quantifying/ +├── .github/ +│ ├── workflows/ +│ │ ├── fetch.yml +│ │ ├── process.yml +│ │ ├── report.yml +│ │ └── static_analysis.yml +├── data/ # Data generated by script runs +│ ├── 20XXQX/ +│ │ ├── 1-fetch/ +│ │ ├── 2-process/ +│ │ ├── 3-report/ +│ │ │ └── README.md # All generated reports are displayed in the README +│ └── ... +├── dev/ +├── pre-automation/ # All Quantifying work prior to adding automation system +├── scripts/ # Run scripts for all phases +│ ├── 1-fetch/ +│ ├── 2-process/ +│ ├── 3-report/ +│ └── shared.py +├── .cc-metadata.yml +├── .flake8 # Python tool configuration +├── .gitignore +├── .pre-commit-config.yaml # Static analysis configuration +├── LICENSE +├── Pipfile # Specifies the project's dependencies and Python version +├── Pipfile.lock +├── README.md +├── env.example +├── history.md +├── pyproject.toml # Python tools configuration +└── sources.md +``` + + ## Development diff --git a/data/2024Q2/1-fetch/gcs_fetched.csv b/data/2024Q2/1-fetch/gcs_fetched.csv new file mode 100644 index 00000000..7804d2b5 --- /dev/null +++ b/data/2024Q2/1-fetch/gcs_fetched.csv @@ -0,0 +1,22 @@ +LICENSE TYPE, No Priori, United States, Canada, India, United Kingdom, Australia, Japan, English, Spanish, French, Arabic, Chinese (Simplified), Indonesian +https://creativecommons.org/licenses/nc/2.0,57000000,43700000,39400,11500,157000,31100,29300,54600000,1160000,2720000,2960,68100000,101000 +https://creativecommons.org/licenses/sa/1.0,108000000,91000000,34400,19100,158000,43100,25100,104000000,268000,332000,28500,106000000,223000 +https://creativecommons.org/licenses/nd/1.0,222000000,259000000,76700,63400,519000,165000,79600,215000000,492000,383000,69500,222000000,2480000 +https://creativecommons.org/licenses/by-sa/3.0,108000000,88800000,64700,47600,254000,151000,145000,93500000,5010000,3980000,90500,114000000,112000 +https://creativecommons.org/licenses/sampling/1.0,170000000,151000000,26500,23600,263000,56800,34300,170000000,87500,21500,1630,171000000,209000 +https://creativecommons.org/licenses/nc-sa/1.0,24700000,17600000,12600,3800,75300,11600,13400,22600000,67700,272000,1200,24700000,42400 +https://creativecommons.org/licenses/nd-nc/2.0,56900000,43700000,43500,11100,156000,36500,28300,54200000,237000,549000,2660,67600000,11500 +https://creativecommons.org/licenses/by-nd-nc/1.0,16000000,10400000,13500,2900,64700,9990,21300,15500000,63400,28800,1920,16000000,27600 +https://creativecommons.org/licenses/sa/1.0,108000000,91500000,23000,17200,158000,38200,21700,104000000,270000,337000,28600,108000000,224000 +https://creativecommons.org/licenses/nd-nc/1.0,57200000,45500000,23100,8400,123000,21400,30000,56100000,135000,249000,3340,57200000,10200 +https://creativecommons.org/publicdomain/zero/1.0,32700000,30000000,20200,18200,48600,12100,32200,30900000,131000,93400,16100,32700000,10500 +https://creativecommons.org/licenses/nc-sa/2.0,25000000,17700000,19700,4640,80700,14700,11900,23400000,368000,1340000,1490,25000000,36100 +https://creativecommons.org/licenses/sa/1.0,108000000,91400000,23000,17100,159000,38200,21600,104000000,270000,337000,28600,108000000,224000 +https://creativecommons.org/licenses/by-sa/1.0,102000000,85900000,21900,15800,150000,36100,21200,97400000,267000,330000,28300,102000000,219000 +https://creativecommons.org/licenses/by-nd/2.5,49500000,35500000,45100,8640,133000,24600,23600,47800000,367000,37900,2200,49500000,11200 +https://creativecommons.org/licenses/by-nd-nc/1.0,15800000,10300000,15600,2940,66800,10200,21400,15300000,62400,29000,1980,15800000,27500 +https://creativecommons.org/licenses/nd/1.0,220000000,194000000,89900,56500,424000,147000,88400,213000000,490000,297000,69500,220000000,2570000 +https://creativecommons.org/licenses/by-nc-sa/4.0,62900000,44700000,49900,32900,163000,32900,79400,58900000,1020000,3610000,11500,62900000,102000 +https://creativecommons.org/licenses/sampling+/1.0,169000000,151000000,27300,22900,274000,57300,34700,168000000,123000,62800,1310,169000000,200000 +https://creativecommons.org/licenses/by-nc-sa/2.5,31600000,22800000,20100,5400,83100,15200,10400,29900000,258000,250000,1570,31600000,7290 +https://creativecommons.org/licenses/nc-sa/1.0,24100000,17500000,10400,3820,58800,11800,10600,23300000,67500,280000,1200,24100000,42400 diff --git a/data/2024Q2/3-report/gcs_country_report.png b/data/2024Q2/3-report/gcs_country_report.png new file mode 100644 index 00000000..fb79663d Binary files /dev/null and b/data/2024Q2/3-report/gcs_country_report.png differ diff --git a/data/2024Q2/3-report/gcs_language_report.png b/data/2024Q2/3-report/gcs_language_report.png new file mode 100644 index 00000000..eec045af Binary files /dev/null and b/data/2024Q2/3-report/gcs_language_report.png differ diff --git a/data/2024Q2/3-report/gcs_licensetype_report.png b/data/2024Q2/3-report/gcs_licensetype_report.png new file mode 100644 index 00000000..e6fc0557 Binary files /dev/null and b/data/2024Q2/3-report/gcs_licensetype_report.png differ diff --git a/data/2024Q2/README.md b/data/2024Q2/README.md new file mode 100644 index 00000000..19cea41e --- /dev/null +++ b/data/2024Q2/README.md @@ -0,0 +1,20 @@ +# 2024Q2 Quantifying the Commons + +## Data Source: Google Custom Search + + +### Country Report +![Number of Google Webpages Licensed by Country](3-report/gcs_country_report.png) +Number of Google Webpages Licensed by Country + + +### License Type Report +![Number of Webpages Licensed by License Type](3-report/gcs_licensetype_report.png) +Number of Webpages Licensed by License Type + + +### Language Report +![Number of Google Webpages Licensed by Language](3-report/gcs_language_report.png) +Number of Google Webpages Licensed by Language + + diff --git a/data/2024Q2/state.yaml b/data/2024Q2/state.yaml new file mode 100644 index 00000000..05a6a00d --- /dev/null +++ b/data/2024Q2/state.yaml @@ -0,0 +1 @@ +total_records_retrieved: 273 diff --git a/data/2024Q3/1-fetch/flickr_fetched/final.csv b/data/2024Q3/1-fetch/flickr_fetched/final.csv new file mode 100644 index 00000000..e69de29b diff --git a/data/2024Q3/1-fetch/flickr_fetched/hs.csv b/data/2024Q3/1-fetch/flickr_fetched/hs.csv new file mode 100644 index 00000000..e69de29b diff --git a/data/2024Q3/1-fetch/flickr_fetched/rec.txt b/data/2024Q3/1-fetch/flickr_fetched/rec.txt new file mode 100644 index 00000000..cc375900 --- /dev/null +++ b/data/2024Q3/1-fetch/flickr_fetched/rec.txt @@ -0,0 +1 @@ +1 1 1 diff --git a/data/2024Q3/1-fetch/gcs_fetched.csv b/data/2024Q3/1-fetch/gcs_fetched.csv new file mode 100644 index 00000000..df660fc7 --- /dev/null +++ b/data/2024Q3/1-fetch/gcs_fetched.csv @@ -0,0 +1,18 @@ +LICENSE TYPE, No Priori, United States, Canada, India, United Kingdom, Australia, Japan, English, Spanish, French, Arabic, Chinese (Simplified), Indonesian +https://creativecommons.org/licenses/by/2.5,235000000,208000000,112000,82400,406000,701000,76800,225000000,3940000,835000,88300,235000000,115000 +https://creativecommons.org/licenses/by/4.0,412000000,334000000,702000,360000,7250000,770000,675000,381000000,16100000,5260000,255000,412000000,11900000 +https://creativecommons.org/licenses/by-nc-sa/2.5,31400000,22700000,21200,5400,83000,15000,10300,29800000,254000,248000,1580,31400000,7480 +https://creativecommons.org/licenses/nc/1.0,54600000,43400000,25100,8390,128000,22200,30500,53600000,136000,479000,3760,54500000,81500 +https://creativecommons.org/licenses/by-nc-sa/1.0,24000000,17400000,11300,3810,59100,11900,10600,23300000,66900,276000,1220,24000000,42900 +https://creativecommons.org/licenses/by/2.1,219000000,189000000,132000,56200,477000,139000,269000,211000000,494000,232000,19800,219000000,119000 +https://creativecommons.org/licenses/nc-sampling+/1.0,41200000,32000000,12900,4590,82300,14600,18800,41000000,52800,37800,381,41200000,31100 +https://creativecommons.org/licenses/by-sa/4.0,175000000,140000000,126000,79300,341000,103000,123000,158000000,6800000,3860000,81300,175000000,626000 +https://creativecommons.org/licenses/by/2.1,217000000,189000000,120000,55500,456000,134000,278000,209000000,219000,241000,20300,217000000,116000 +https://creativecommons.org/licenses/by-nc-nd/2.1,66000000,51100000,21000,9200,144000,25100,30400,64300000,167000,581000,3690,66100000,19300 +https://creativecommons.org/licenses/by-nc/3.0,88200000,77800000,32000,14000,289000,91900,43000,85000000,7630000,6890000,13700,88300000,40200 +https://creativecommons.org/licenses/by/2.1,269000000,235000000,122000,54500,477000,124000,277000,261000000,455000,1900000,20100,269000000,113000 +https://creativecommons.org/licenses/by-nd/2.0,32500000,23600000,17000,6910,57500,19500,20100,31200000,121000,47200,5290,32500000,7660 +https://creativecommons.org/licenses/by-nc-sa/3.0,35800000,29800000,18000,5760,149000,69400,12200,33700000,623000,4440000,7890,35800000,16000 +https://creativecommons.org/licenses/sa/1.0,120000000,101000000,23500,16600,167000,56500,24400,116000000,247000,314000,29100,120000000,213000 +https://creativecommons.org/licenses/sa/1.0,120000000,101000000,23500,16600,167000,56500,24400,116000000,247000,314000,29100,120000000,213000 +https://creativecommons.org/licenses/by-nc-sa/2.0,20400000,13800000,21400,3730,65000,21700,9270,19100000,92600,773000,4470,20400000,5740 diff --git a/data/2024Q3/3-report/gcs_country_report.png b/data/2024Q3/3-report/gcs_country_report.png new file mode 100644 index 00000000..69e054c8 Binary files /dev/null and b/data/2024Q3/3-report/gcs_country_report.png differ diff --git a/data/2024Q3/3-report/gcs_language_report.png b/data/2024Q3/3-report/gcs_language_report.png new file mode 100644 index 00000000..65878a4e Binary files /dev/null and b/data/2024Q3/3-report/gcs_language_report.png differ diff --git a/data/2024Q3/3-report/gcs_licensetype_report.png b/data/2024Q3/3-report/gcs_licensetype_report.png new file mode 100644 index 00000000..3c2e37cb Binary files /dev/null and b/data/2024Q3/3-report/gcs_licensetype_report.png differ diff --git a/data/2024Q3/README.md b/data/2024Q3/README.md new file mode 100644 index 00000000..f1e525a9 --- /dev/null +++ b/data/2024Q3/README.md @@ -0,0 +1,19 @@ +# 2024Q3 Quantifying the Commons +## Data Source: Google Custom Search + + +### Country Report +![Number of Google Webpages Licensed by Country](3-report/gcs_country_report.png) +Number of Google Webpages Licensed by Country + + +### License Type Report +![Number of Webpages Licensed by License Type](3-report/gcs_licensetype_report.png) +Number of Webpages Licensed by License Type + + +### Language Report +![Number of Google Webpages Licensed by Language](3-report/gcs_language_report.png) +Number of Google Webpages Licensed by Language + + diff --git a/data/2024Q3/state.yaml b/data/2024Q3/state.yaml new file mode 100644 index 00000000..2447da60 --- /dev/null +++ b/data/2024Q3/state.yaml @@ -0,0 +1 @@ +total_records_retrieved: 390 diff --git a/data/google_countries.tsv b/data/google_countries.tsv new file mode 100644 index 00000000..df0ee732 --- /dev/null +++ b/data/google_countries.tsv @@ -0,0 +1,243 @@ +Country Country Collection Name +Afghanistan countryAF +Albania countryAL +Algeria countryDZ +American Samoa countryAS +Andorra countryAD +Angola countryAO +Anguilla countryAI +Antarctica countryAQ +Antigua and Barbuda countryAG +Argentina countryAR +Armenia countryAM +Aruba countryAW +Australia countryAU +Austria countryAT +Azerbaijan countryAZ +Bahamas countryBS +Bahrain countryBH +Bangladesh countryBD +Barbados countryBB +Belarus countryBY +Belgium countryBE +Belize countryBZ +Benin countryBJ +Bermuda countryBM +Bhutan countryBT +Bolivia countryBO +Bosnia and Herzegovina countryBA +Botswana countryBW +Bouvet Island countryBV +Brazil countryBR +British Indian Ocean Territory countryIO +Brunei Darussalam countryBN +Bulgaria countryBG +Burkina Faso countryBF +Burundi countryBI +Cambodia countryKH +Cameroon countryCM +Canada countryCA +Cape Verde countryCV +Cayman Islands countryKY +Central African Republic countryCF +Chad countryTD +Chile countryCL +China countryCN +Christmas Island countryCX +Cocos (Keeling) Islands countryCC +Colombia countryCO +Comoros countryKM +Congo countryCG +Congo, the Democratic Republic of the countryCD +Cook Islands countryCK +Costa Rica countryCR +Cote D'ivoire countryCI +Croatia (Hrvatska) countryHR +Cuba countryCU +Cyprus countryCY +Czech Republic countryCZ +Denmark countryDK +Djibouti countryDJ +Dominica countryDM +Dominican Republic countryDO +East Timor countryTP +Ecuador countryEC +Egypt countryEG +El Salvador countrySV +Equatorial Guinea countryGQ +Eritrea countryER +Estonia countryEE +Ethiopia countryET +European Union countryEU +Falkland Islands (Malvinas) countryFK +Faroe Islands countryFO +Fiji countryFJ +Finland countryFI +France countryFR +France, Metropolitan countryFX +French Guiana countryGF +French Polynesia countryPF +French Southern Territories countryTF +Gabon countryGA +Gambia countryGM +Georgia countryGE +Germany countryDE +Ghana countryGH +Gibraltar countryGI +Greece countryGR +Greenland countryGL +Grenada countryGD +Guadeloupe countryGP +Guam countryGU +Guatemala countryGT +Guinea countryGN +Guinea-Bissau countryGW +Guyana countryGY +Haiti countryHT +Heard Island and Mcdonald Islands countryHM +Holy See (Vatican City State) countryVA +Honduras countryHN +Hong Kong countryHK +Hungary countryHU +Iceland countryIS +India countryIN +Indonesia countryID +Iran, Islamic Republic of countryIR +Iraq countryIQ +Ireland countryIE +Israel countryIL +Italy countryIT +Jamaica countryJM +Japan countryJP +Jordan countryJO +Kazakhstan countryKZ +Kenya countryKE +Kiribati countryKI +Korea, Democratic People's Republic of countryKP +Korea, Republic of countryKR +Kuwait countryKW +Kyrgyzstan countryKG +Lao People's Democratic Republic countryLA +Latvia countryLV +Lebanon countryLB +Lesotho countryLS +Liberia countryLR +Libyan Arab Jamahiriya countryLY +Liechtenstein countryLI +Lithuania countryLT +Luxembourg countryLU +Macao countryMO +Macedonia, the Former Yugosalv Republic of countryMK +Madagascar countryMG +Malawi countryMW +Malaysia countryMY +Maldives countryMV +Mali countryML +Malta countryMT +Marshall Islands countryMH +Martinique countryMQ +Mauritania countryMR +Mauritius countryMU +Mayotte countryYT +Mexico countryMX +Micronesia, Federated States of countryFM +Moldova, Republic of countryMD +Monaco countryMC +Mongolia countryMN +Montserrat countryMS +Morocco countryMA +Mozambique countryMZ +Myanmar countryMM +Namibia countryNA +Nauru countryNR +Nepal countryNP +Netherlands countryNL +Netherlands Antilles countryAN +New Caledonia countryNC +New Zealand countryNZ +Nicaragua countryNI +Niger countryNE +Nigeria countryNG +Niue countryNU +Norfolk Island countryNF +Northern Mariana Islands countryMP +Norway countryNO +Oman countryOM +Pakistan countryPK +Palau countryPW +Palestinian Territory countryPS +Panama countryPA +Papua New Guinea countryPG +Paraguay countryPY +Peru countryPE +Philippines countryPH +Pitcairn countryPN +Poland countryPL +Portugal countryPT +Puerto Rico countryPR +Qatar countryQA +Reunion countryRE +Romania countryRO +Russian Federation countryRU +Rwanda countryRW +Saint Helena countrySH +Saint Kitts and Nevis countryKN +Saint Lucia countryLC +Saint Pierre and Miquelon countryPM +Saint Vincent and the Grenadines countryVC +Samoa countryWS +San Marino countrySM +Sao Tome and Principe countryST +Saudi Arabia countrySA +Senegal countrySN +Serbia and Montenegro countryCS +Seychelles countrySC +Sierra Leone countrySL +Singapore countrySG +Slovakia countrySK +Slovenia countrySI +Solomon Islands countrySB +Somalia countrySO +South Africa countryZA +South Georgia and the South Sandwich Islands countryGS +Spain countryES +Sri Lanka countryLK +Sudan countrySD +Suriname countrySR +Svalbard and Jan Mayen countrySJ +Swaziland countrySZ +Sweden countrySE +Switzerland countryCH +Syrian Arab Republic countrySY +Taiwan, Province of China countryTW +Tajikistan countryTJ +Tanzania, United Republic of countryTZ +Thailand countryTH +Togo countryTG +Tokelau countryTK +Tonga countryTO +Trinidad and Tobago countryTT +Tunisia countryTN +Turkey countryTR +Turkmenistan countryTM +Turks and Caicos Islands countryTC +Tuvalu countryTV +Uganda countryUG +Ukraine countryUA +United Arab Emirates countryAE +United Kingdom countryUK +United States countryUS +United States Minor Outlying Islands countryUM +Uruguay countryUY +Uzbekistan countryUZ +Vanuatu countryVU +Venezuela countryVE +Vietnam countryVN +Virgin Islands, British countryVG +Virgin Islands, U.S. countryVI +Wallis and Futuna countryWF +Western Sahara countryEH +Yemen countryYE +Yugoslavia countryYU +Zambia countryZM +Zimbabwe countryZW diff --git a/data/google_lang.txt b/data/google_lang.txt new file mode 100644 index 00000000..0e1df5a4 --- /dev/null +++ b/data/google_lang.txt @@ -0,0 +1,69 @@ +"lang_ar": Arabic + +"lang_bg": Bulgarian + +"lang_ca": Catalan + +"lang_cs": Czech + +"lang_da": Danish + +"lang_de": German + +"lang_el": Greek + +"lang_en": English + +"lang_es": Spanish + +"lang_et": Estonian + +"lang_fi": Finnish + +"lang_fr": French + +"lang_hr": Croatian + +"lang_hu": Hungarian + +"lang_id": Indonesian + +"lang_is": Icelandic + +"lang_it": Italian + +"lang_iw": Hebrew + +"lang_ja": Japanese + +"lang_ko": Korean + +"lang_lt": Lithuanian + +"lang_lv": Latvian + +"lang_nl": Dutch + +"lang_no": Norwegian + +"lang_pl": Polish + +"lang_pt": Portuguese + +"lang_ro": Romanian + +"lang_ru": Russian + +"lang_sk": Slovak + +"lang_sl": Slovenian + +"lang_sr": Serbian + +"lang_sv": Swedish + +"lang_tr": Turkish + +"lang_zh-CN": Chinese (Simplified) + +"lang_zh-TW": Chinese (Traditional) diff --git a/data/legal-tool-paths.txt b/data/legal-tool-paths.txt new file mode 100644 index 00000000..d942cd98 --- /dev/null +++ b/data/legal-tool-paths.txt @@ -0,0 +1,652 @@ +licenses/publicdomain +licenses/by/1.0 +licenses/by/1.0/fi +licenses/by/1.0/il +licenses/by/1.0/nl +licenses/by-nc/1.0 +licenses/by-nc/1.0/fi +licenses/by-nc/1.0/il +licenses/by-nc/1.0/nl +licenses/by-nc-sa/1.0 +licenses/by-nc-sa/1.0/fi +licenses/by-nc-sa/1.0/il +licenses/by-nc-sa/1.0/nl +licenses/by-nd/1.0 +licenses/by-nd/1.0/fi +licenses/by-nd/1.0/il +licenses/by-nd/1.0/nl +licenses/by-nd-nc/1.0 +licenses/by-nd-nc/1.0/fi +licenses/by-nd-nc/1.0/il +licenses/by-nd-nc/1.0/nl +licenses/by-sa/1.0 +licenses/by-sa/1.0/fi +licenses/by-sa/1.0/il +licenses/by-sa/1.0/nl +licenses/nc/1.0 +licenses/nc/1.0/fi +licenses/nc/1.0/nl +licenses/nc-sa/1.0 +licenses/nc-sa/1.0/fi +licenses/nc-sa/1.0/nl +licenses/nc-sampling+/1.0 +licenses/nc-sampling+/1.0/tw +licenses/nd/1.0 +licenses/nd/1.0/fi +licenses/nd/1.0/nl +licenses/nd-nc/1.0 +licenses/nd-nc/1.0/fi +licenses/nd-nc/1.0/nl +licenses/sa/1.0 +licenses/sa/1.0/fi +licenses/sa/1.0/nl +licenses/sampling/1.0 +licenses/sampling/1.0/br +licenses/sampling/1.0/tw +licenses/sampling+/1.0 +licenses/sampling+/1.0/br +licenses/sampling+/1.0/de +licenses/sampling+/1.0/tw +licenses/by/2.0 +licenses/by/2.0/at +licenses/by/2.0/au +licenses/by/2.0/be +licenses/by/2.0/br +licenses/by/2.0/ca +licenses/by/2.0/cl +licenses/by/2.0/de +licenses/by/2.0/es +licenses/by/2.0/fr +licenses/by/2.0/hr +licenses/by/2.0/it +licenses/by/2.0/jp +licenses/by/2.0/kr +licenses/by/2.0/nl +licenses/by/2.0/pl +licenses/by/2.0/tw +licenses/by/2.0/uk +licenses/by/2.0/za +licenses/by-nc/2.0 +licenses/by-nc/2.0/at +licenses/by-nc/2.0/au +licenses/by-nc/2.0/be +licenses/by-nc/2.0/br +licenses/by-nc/2.0/ca +licenses/by-nc/2.0/cl +licenses/by-nc/2.0/de +licenses/by-nc/2.0/es +licenses/by-nc/2.0/fr +licenses/by-nc/2.0/hr +licenses/by-nc/2.0/it +licenses/by-nc/2.0/jp +licenses/by-nc/2.0/kr +licenses/by-nc/2.0/nl +licenses/by-nc/2.0/pl +licenses/by-nc/2.0/tw +licenses/by-nc/2.0/uk +licenses/by-nc/2.0/za +licenses/by-nc-nd/2.0 +licenses/by-nc-nd/2.0/at +licenses/by-nc-nd/2.0/au +licenses/by-nc-nd/2.0/be +licenses/by-nc-nd/2.0/br +licenses/by-nc-nd/2.0/ca +licenses/by-nc-nd/2.0/cl +licenses/by-nc-nd/2.0/de +licenses/by-nc-nd/2.0/es +licenses/by-nc-nd/2.0/fr +licenses/by-nc-nd/2.0/hr +licenses/by-nc-nd/2.0/it +licenses/by-nc-nd/2.0/jp +licenses/by-nc-nd/2.0/kr +licenses/by-nc-nd/2.0/nl +licenses/by-nc-nd/2.0/pl +licenses/by-nc-nd/2.0/tw +licenses/by-nc-nd/2.0/uk +licenses/by-nc-nd/2.0/za +licenses/by-nc-sa/2.0 +licenses/by-nc-sa/2.0/at +licenses/by-nc-sa/2.0/au +licenses/by-nc-sa/2.0/be +licenses/by-nc-sa/2.0/br +licenses/by-nc-sa/2.0/ca +licenses/by-nc-sa/2.0/cl +licenses/by-nc-sa/2.0/de +licenses/by-nc-sa/2.0/es +licenses/by-nc-sa/2.0/fr +licenses/by-nc-sa/2.0/hr +licenses/by-nc-sa/2.0/it +licenses/by-nc-sa/2.0/jp +licenses/by-nc-sa/2.0/kr +licenses/by-nc-sa/2.0/nl +licenses/by-nc-sa/2.0/pl +licenses/by-nc-sa/2.0/tw +licenses/by-nc-sa/2.0/uk +licenses/by-nc-sa/2.0/za +licenses/by-nd/2.0 +licenses/by-nd/2.0/at +licenses/by-nd/2.0/au +licenses/by-nd/2.0/be +licenses/by-nd/2.0/br +licenses/by-nd/2.0/ca +licenses/by-nd/2.0/cl +licenses/by-nd/2.0/de +licenses/by-nd/2.0/es +licenses/by-nd/2.0/fr +licenses/by-nd/2.0/hr +licenses/by-nd/2.0/it +licenses/by-nd/2.0/jp +licenses/by-nd/2.0/kr +licenses/by-nd/2.0/nl +licenses/by-nd/2.0/pl +licenses/by-nd/2.0/tw +licenses/by-nd/2.0/uk +licenses/by-nd/2.0/za +licenses/by-nd-nc/2.0 +licenses/by-nd-nc/2.0/jp +licenses/by-sa/2.0 +licenses/by-sa/2.0/at +licenses/by-sa/2.0/au +licenses/by-sa/2.0/be +licenses/by-sa/2.0/br +licenses/by-sa/2.0/ca +licenses/by-sa/2.0/cl +licenses/by-sa/2.0/de +licenses/by-sa/2.0/es +licenses/by-sa/2.0/fr +licenses/by-sa/2.0/hr +licenses/by-sa/2.0/it +licenses/by-sa/2.0/jp +licenses/by-sa/2.0/kr +licenses/by-sa/2.0/nl +licenses/by-sa/2.0/pl +licenses/by-sa/2.0/tw +licenses/by-sa/2.0/uk +licenses/by-sa/2.0/za +licenses/devnations/2.0 +licenses/nc/2.0 +licenses/nc/2.0/jp +licenses/nc-sa/2.0 +licenses/nc-sa/2.0/jp +licenses/nd/2.0 +licenses/nd/2.0/jp +licenses/nd-nc/2.0 +licenses/nd-nc/2.0/jp +licenses/sa/2.0 +licenses/sa/2.0/jp +licenses/by/2.1 +licenses/by/2.1/au +licenses/by/2.1/ca +licenses/by/2.1/es +licenses/by/2.1/jp +licenses/by-nc/2.1 +licenses/by-nc/2.1/au +licenses/by-nc/2.1/ca +licenses/by-nc/2.1/es +licenses/by-nc/2.1/jp +licenses/by-nc-nd/2.1 +licenses/by-nc-nd/2.1/au +licenses/by-nc-nd/2.1/ca +licenses/by-nc-nd/2.1/es +licenses/by-nc-nd/2.1/jp +licenses/by-nc-sa/2.1 +licenses/by-nc-sa/2.1/au +licenses/by-nc-sa/2.1/ca +licenses/by-nc-sa/2.1/es +licenses/by-nc-sa/2.1/jp +licenses/by-nd/2.1 +licenses/by-nd/2.1/au +licenses/by-nd/2.1/ca +licenses/by-nd/2.1/es +licenses/by-nd/2.1/jp +licenses/by-sa/2.1 +licenses/by-sa/2.1/au +licenses/by-sa/2.1/ca +licenses/by-sa/2.1/es +licenses/by-sa/2.1/jp +licenses/by/2.5 +licenses/by/2.5/ar +licenses/by/2.5/au +licenses/by/2.5/bg +licenses/by/2.5/br +licenses/by/2.5/ca +licenses/by/2.5/ch +licenses/by/2.5/cn +licenses/by/2.5/co +licenses/by/2.5/dk +licenses/by/2.5/es +licenses/by/2.5/hr +licenses/by/2.5/hu +licenses/by/2.5/il +licenses/by/2.5/in +licenses/by/2.5/it +licenses/by/2.5/mk +licenses/by/2.5/mt +licenses/by/2.5/mx +licenses/by/2.5/my +licenses/by/2.5/nl +licenses/by/2.5/pe +licenses/by/2.5/pl +licenses/by/2.5/pt +licenses/by/2.5/scotland +licenses/by/2.5/se +licenses/by/2.5/si +licenses/by/2.5/tw +licenses/by/2.5/za +licenses/by-nc/2.5 +licenses/by-nc/2.5/ar +licenses/by-nc/2.5/au +licenses/by-nc/2.5/bg +licenses/by-nc/2.5/br +licenses/by-nc/2.5/ca +licenses/by-nc/2.5/ch +licenses/by-nc/2.5/cn +licenses/by-nc/2.5/co +licenses/by-nc/2.5/dk +licenses/by-nc/2.5/es +licenses/by-nc/2.5/hr +licenses/by-nc/2.5/hu +licenses/by-nc/2.5/il +licenses/by-nc/2.5/in +licenses/by-nc/2.5/it +licenses/by-nc/2.5/mk +licenses/by-nc/2.5/mt +licenses/by-nc/2.5/mx +licenses/by-nc/2.5/my +licenses/by-nc/2.5/nl +licenses/by-nc/2.5/pe +licenses/by-nc/2.5/pl +licenses/by-nc/2.5/pt +licenses/by-nc/2.5/scotland +licenses/by-nc/2.5/se +licenses/by-nc/2.5/si +licenses/by-nc/2.5/tw +licenses/by-nc/2.5/za +licenses/by-nc-nd/2.5 +licenses/by-nc-nd/2.5/ar +licenses/by-nc-nd/2.5/au +licenses/by-nc-nd/2.5/bg +licenses/by-nc-nd/2.5/br +licenses/by-nc-nd/2.5/ca +licenses/by-nc-nd/2.5/ch +licenses/by-nc-nd/2.5/cn +licenses/by-nc-nd/2.5/co +licenses/by-nc-nd/2.5/dk +licenses/by-nc-nd/2.5/es +licenses/by-nc-nd/2.5/hr +licenses/by-nc-nd/2.5/hu +licenses/by-nc-nd/2.5/il +licenses/by-nc-nd/2.5/in +licenses/by-nc-nd/2.5/it +licenses/by-nc-nd/2.5/mk +licenses/by-nc-nd/2.5/mt +licenses/by-nc-nd/2.5/mx +licenses/by-nc-nd/2.5/my +licenses/by-nc-nd/2.5/nl +licenses/by-nc-nd/2.5/pe +licenses/by-nc-nd/2.5/pl +licenses/by-nc-nd/2.5/pt +licenses/by-nc-nd/2.5/scotland +licenses/by-nc-nd/2.5/se +licenses/by-nc-nd/2.5/si +licenses/by-nc-nd/2.5/tw +licenses/by-nc-nd/2.5/za +licenses/by-nc-sa/2.5 +licenses/by-nc-sa/2.5/ar +licenses/by-nc-sa/2.5/au +licenses/by-nc-sa/2.5/bg +licenses/by-nc-sa/2.5/br +licenses/by-nc-sa/2.5/ca +licenses/by-nc-sa/2.5/ch +licenses/by-nc-sa/2.5/cn +licenses/by-nc-sa/2.5/co +licenses/by-nc-sa/2.5/dk +licenses/by-nc-sa/2.5/es +licenses/by-nc-sa/2.5/hr +licenses/by-nc-sa/2.5/hu +licenses/by-nc-sa/2.5/il +licenses/by-nc-sa/2.5/in +licenses/by-nc-sa/2.5/it +licenses/by-nc-sa/2.5/mk +licenses/by-nc-sa/2.5/mt +licenses/by-nc-sa/2.5/mx +licenses/by-nc-sa/2.5/my +licenses/by-nc-sa/2.5/nl +licenses/by-nc-sa/2.5/pe +licenses/by-nc-sa/2.5/pl +licenses/by-nc-sa/2.5/pt +licenses/by-nc-sa/2.5/scotland +licenses/by-nc-sa/2.5/se +licenses/by-nc-sa/2.5/si +licenses/by-nc-sa/2.5/tw +licenses/by-nc-sa/2.5/za +licenses/by-nd/2.5 +licenses/by-nd/2.5/ar +licenses/by-nd/2.5/au +licenses/by-nd/2.5/bg +licenses/by-nd/2.5/br +licenses/by-nd/2.5/ca +licenses/by-nd/2.5/ch +licenses/by-nd/2.5/cn +licenses/by-nd/2.5/co +licenses/by-nd/2.5/dk +licenses/by-nd/2.5/es +licenses/by-nd/2.5/hr +licenses/by-nd/2.5/hu +licenses/by-nd/2.5/il +licenses/by-nd/2.5/in +licenses/by-nd/2.5/it +licenses/by-nd/2.5/mk +licenses/by-nd/2.5/mt +licenses/by-nd/2.5/mx +licenses/by-nd/2.5/my +licenses/by-nd/2.5/nl +licenses/by-nd/2.5/pe +licenses/by-nd/2.5/pl +licenses/by-nd/2.5/pt +licenses/by-nd/2.5/scotland +licenses/by-nd/2.5/se +licenses/by-nd/2.5/si +licenses/by-nd/2.5/tw +licenses/by-nd/2.5/za +licenses/by-sa/2.5 +licenses/by-sa/2.5/ar +licenses/by-sa/2.5/au +licenses/by-sa/2.5/bg +licenses/by-sa/2.5/br +licenses/by-sa/2.5/ca +licenses/by-sa/2.5/ch +licenses/by-sa/2.5/cn +licenses/by-sa/2.5/co +licenses/by-sa/2.5/dk +licenses/by-sa/2.5/es +licenses/by-sa/2.5/hr +licenses/by-sa/2.5/hu +licenses/by-sa/2.5/il +licenses/by-sa/2.5/in +licenses/by-sa/2.5/it +licenses/by-sa/2.5/mk +licenses/by-sa/2.5/mt +licenses/by-sa/2.5/mx +licenses/by-sa/2.5/my +licenses/by-sa/2.5/nl +licenses/by-sa/2.5/pe +licenses/by-sa/2.5/pl +licenses/by-sa/2.5/pt +licenses/by-sa/2.5/scotland +licenses/by-sa/2.5/se +licenses/by-sa/2.5/si +licenses/by-sa/2.5/tw +licenses/by-sa/2.5/za +licenses/by/3.0 +licenses/by/3.0/am +licenses/by/3.0/at +licenses/by/3.0/au +licenses/by/3.0/az +licenses/by/3.0/br +licenses/by/3.0/ca +licenses/by/3.0/ch +licenses/by/3.0/cl +licenses/by/3.0/cn +licenses/by/3.0/cr +licenses/by/3.0/cz +licenses/by/3.0/de +licenses/by/3.0/ec +licenses/by/3.0/ee +licenses/by/3.0/eg +licenses/by/3.0/es +licenses/by/3.0/fr +licenses/by/3.0/ge +licenses/by/3.0/gr +licenses/by/3.0/gt +licenses/by/3.0/hk +licenses/by/3.0/hr +licenses/by/3.0/ie +licenses/by/3.0/igo +licenses/by/3.0/it +licenses/by/3.0/lu +licenses/by/3.0/nl +licenses/by/3.0/no +licenses/by/3.0/nz +licenses/by/3.0/ph +licenses/by/3.0/pl +licenses/by/3.0/pr +licenses/by/3.0/pt +licenses/by/3.0/ro +licenses/by/3.0/rs +licenses/by/3.0/sg +licenses/by/3.0/th +licenses/by/3.0/tw +licenses/by/3.0/ug +licenses/by/3.0/us +licenses/by/3.0/ve +licenses/by/3.0/vn +licenses/by/3.0/za +licenses/by-nc/3.0 +licenses/by-nc/3.0/am +licenses/by-nc/3.0/at +licenses/by-nc/3.0/au +licenses/by-nc/3.0/az +licenses/by-nc/3.0/br +licenses/by-nc/3.0/ca +licenses/by-nc/3.0/ch +licenses/by-nc/3.0/cl +licenses/by-nc/3.0/cn +licenses/by-nc/3.0/cr +licenses/by-nc/3.0/cz +licenses/by-nc/3.0/de +licenses/by-nc/3.0/ec +licenses/by-nc/3.0/ee +licenses/by-nc/3.0/eg +licenses/by-nc/3.0/es +licenses/by-nc/3.0/fr +licenses/by-nc/3.0/ge +licenses/by-nc/3.0/gr +licenses/by-nc/3.0/gt +licenses/by-nc/3.0/hk +licenses/by-nc/3.0/hr +licenses/by-nc/3.0/ie +licenses/by-nc/3.0/igo +licenses/by-nc/3.0/it +licenses/by-nc/3.0/lu +licenses/by-nc/3.0/nl +licenses/by-nc/3.0/no +licenses/by-nc/3.0/nz +licenses/by-nc/3.0/ph +licenses/by-nc/3.0/pl +licenses/by-nc/3.0/pr +licenses/by-nc/3.0/pt +licenses/by-nc/3.0/ro +licenses/by-nc/3.0/rs +licenses/by-nc/3.0/sg +licenses/by-nc/3.0/th +licenses/by-nc/3.0/tw +licenses/by-nc/3.0/ug +licenses/by-nc/3.0/us +licenses/by-nc/3.0/ve +licenses/by-nc/3.0/vn +licenses/by-nc/3.0/za +licenses/by-nc-nd/3.0 +licenses/by-nc-nd/3.0/am +licenses/by-nc-nd/3.0/at +licenses/by-nc-nd/3.0/au +licenses/by-nc-nd/3.0/az +licenses/by-nc-nd/3.0/br +licenses/by-nc-nd/3.0/ca +licenses/by-nc-nd/3.0/ch +licenses/by-nc-nd/3.0/cl +licenses/by-nc-nd/3.0/cn +licenses/by-nc-nd/3.0/cr +licenses/by-nc-nd/3.0/cz +licenses/by-nc-nd/3.0/de +licenses/by-nc-nd/3.0/ec +licenses/by-nc-nd/3.0/ee +licenses/by-nc-nd/3.0/eg +licenses/by-nc-nd/3.0/es +licenses/by-nc-nd/3.0/fr +licenses/by-nc-nd/3.0/ge +licenses/by-nc-nd/3.0/gr +licenses/by-nc-nd/3.0/gt +licenses/by-nc-nd/3.0/hk +licenses/by-nc-nd/3.0/hr +licenses/by-nc-nd/3.0/ie +licenses/by-nc-nd/3.0/igo +licenses/by-nc-nd/3.0/it +licenses/by-nc-nd/3.0/lu +licenses/by-nc-nd/3.0/nl +licenses/by-nc-nd/3.0/no +licenses/by-nc-nd/3.0/nz +licenses/by-nc-nd/3.0/ph +licenses/by-nc-nd/3.0/pl +licenses/by-nc-nd/3.0/pr +licenses/by-nc-nd/3.0/pt +licenses/by-nc-nd/3.0/ro +licenses/by-nc-nd/3.0/rs +licenses/by-nc-nd/3.0/sg +licenses/by-nc-nd/3.0/th +licenses/by-nc-nd/3.0/tw +licenses/by-nc-nd/3.0/ug +licenses/by-nc-nd/3.0/us +licenses/by-nc-nd/3.0/ve +licenses/by-nc-nd/3.0/vn +licenses/by-nc-nd/3.0/za +licenses/by-nc-sa/3.0 +licenses/by-nc-sa/3.0/am +licenses/by-nc-sa/3.0/at +licenses/by-nc-sa/3.0/au +licenses/by-nc-sa/3.0/az +licenses/by-nc-sa/3.0/br +licenses/by-nc-sa/3.0/ca +licenses/by-nc-sa/3.0/ch +licenses/by-nc-sa/3.0/cl +licenses/by-nc-sa/3.0/cn +licenses/by-nc-sa/3.0/cr +licenses/by-nc-sa/3.0/cz +licenses/by-nc-sa/3.0/de +licenses/by-nc-sa/3.0/ec +licenses/by-nc-sa/3.0/ee +licenses/by-nc-sa/3.0/eg +licenses/by-nc-sa/3.0/es +licenses/by-nc-sa/3.0/fr +licenses/by-nc-sa/3.0/ge +licenses/by-nc-sa/3.0/gr +licenses/by-nc-sa/3.0/gt +licenses/by-nc-sa/3.0/hk +licenses/by-nc-sa/3.0/hr +licenses/by-nc-sa/3.0/ie +licenses/by-nc-sa/3.0/igo +licenses/by-nc-sa/3.0/it +licenses/by-nc-sa/3.0/lu +licenses/by-nc-sa/3.0/nl +licenses/by-nc-sa/3.0/no +licenses/by-nc-sa/3.0/nz +licenses/by-nc-sa/3.0/ph +licenses/by-nc-sa/3.0/pl +licenses/by-nc-sa/3.0/pr +licenses/by-nc-sa/3.0/pt +licenses/by-nc-sa/3.0/ro +licenses/by-nc-sa/3.0/rs +licenses/by-nc-sa/3.0/sg +licenses/by-nc-sa/3.0/th +licenses/by-nc-sa/3.0/tw +licenses/by-nc-sa/3.0/ug +licenses/by-nc-sa/3.0/us +licenses/by-nc-sa/3.0/ve +licenses/by-nc-sa/3.0/vn +licenses/by-nc-sa/3.0/za +licenses/by-nd/3.0 +licenses/by-nd/3.0/am +licenses/by-nd/3.0/at +licenses/by-nd/3.0/au +licenses/by-nd/3.0/az +licenses/by-nd/3.0/br +licenses/by-nd/3.0/ca +licenses/by-nd/3.0/ch +licenses/by-nd/3.0/cl +licenses/by-nd/3.0/cn +licenses/by-nd/3.0/cr +licenses/by-nd/3.0/cz +licenses/by-nd/3.0/de +licenses/by-nd/3.0/ec +licenses/by-nd/3.0/ee +licenses/by-nd/3.0/eg +licenses/by-nd/3.0/es +licenses/by-nd/3.0/fr +licenses/by-nd/3.0/ge +licenses/by-nd/3.0/gr +licenses/by-nd/3.0/gt +licenses/by-nd/3.0/hk +licenses/by-nd/3.0/hr +licenses/by-nd/3.0/ie +licenses/by-nd/3.0/igo +licenses/by-nd/3.0/it +licenses/by-nd/3.0/lu +licenses/by-nd/3.0/nl +licenses/by-nd/3.0/no +licenses/by-nd/3.0/nz +licenses/by-nd/3.0/ph +licenses/by-nd/3.0/pl +licenses/by-nd/3.0/pr +licenses/by-nd/3.0/pt +licenses/by-nd/3.0/ro +licenses/by-nd/3.0/rs +licenses/by-nd/3.0/sg +licenses/by-nd/3.0/th +licenses/by-nd/3.0/tw +licenses/by-nd/3.0/ug +licenses/by-nd/3.0/us +licenses/by-nd/3.0/ve +licenses/by-nd/3.0/vn +licenses/by-nd/3.0/za +licenses/by-sa/3.0 +licenses/by-sa/3.0/am +licenses/by-sa/3.0/at +licenses/by-sa/3.0/au +licenses/by-sa/3.0/az +licenses/by-sa/3.0/br +licenses/by-sa/3.0/ca +licenses/by-sa/3.0/ch +licenses/by-sa/3.0/cl +licenses/by-sa/3.0/cn +licenses/by-sa/3.0/cr +licenses/by-sa/3.0/cz +licenses/by-sa/3.0/de +licenses/by-sa/3.0/ec +licenses/by-sa/3.0/ee +licenses/by-sa/3.0/eg +licenses/by-sa/3.0/es +licenses/by-sa/3.0/fr +licenses/by-sa/3.0/ge +licenses/by-sa/3.0/gr +licenses/by-sa/3.0/gt +licenses/by-sa/3.0/hk +licenses/by-sa/3.0/hr +licenses/by-sa/3.0/ie +licenses/by-sa/3.0/igo +licenses/by-sa/3.0/it +licenses/by-sa/3.0/lu +licenses/by-sa/3.0/nl +licenses/by-sa/3.0/no +licenses/by-sa/3.0/nz +licenses/by-sa/3.0/ph +licenses/by-sa/3.0/pl +licenses/by-sa/3.0/pr +licenses/by-sa/3.0/pt +licenses/by-sa/3.0/ro +licenses/by-sa/3.0/rs +licenses/by-sa/3.0/sg +licenses/by-sa/3.0/th +licenses/by-sa/3.0/tw +licenses/by-sa/3.0/ug +licenses/by-sa/3.0/us +licenses/by-sa/3.0/ve +licenses/by-sa/3.0/vn +licenses/by-sa/3.0/za +licenses/by/4.0 +licenses/by-nc/4.0 +licenses/by-nc-nd/4.0 +licenses/by-nc-sa/4.0 +licenses/by-nd/4.0 +licenses/by-sa/4.0 +publicdomain/mark/1.0 +publicdomain/zero/1.0 diff --git a/env.example b/env.example index b58071a6..f44832f2 100644 --- a/env.example +++ b/env.example @@ -1,39 +1,28 @@ -## photos.py & photos_detail.py -# "The flickr developer guide: https://www.flickr.com/services/developer/" +# This file must be copied to .env and the appropriate variables populated. -# FLICKR_API_KEY = -# FLICKR_API_SECRET = +## GCS (Google Custom Search) -## deviantart_scratcher.py & google_scratcher.py +# https://developers.google.com/custom-search/v1/introduction # "Custom Search JSON API requires the use of an API key. An API key is a way # to identify your client to Google." -# https://developers.google.com/custom-search/v1/introduction +# +# https://googleapis.github.io/google-api-python-client/docs/epy/index.html +# "string, key obtained from https://code.google.com/apis/console" -# GOOGLE_API_KEYS = key1, key2 +# GCS_DEVELOPER_KEY = +# https://developers.google.com/custom-search/v1/reference/rest/v1/Search # "The identifier of an engine created using the Programmable Search Engine # Control Panel [https://programmablesearchengine.google.com/about/]" -# https://developers.google.com/custom-search/v1/reference/rest/v1/Search +# +# https://googleapis.github.io/google-api-python-client/docs/dyn/customsearch_v1.cse.html +# "string, The Programmable Search Engine ID to use for this request." -# PSE_KEY = +# GCS_CX = +## Flickr +# "The flickr developer guide: https://www.flickr.com/services/developer/" -## vimeo_scratcher.py -# "Before we set you loose on the API, we ask that you provide a little -# information about your app. An app in this sense can be a full-featured -# mobile application, a dynamic web page, or a three-line script. If it's -# making API calls, it's an app." -# https://developer.vimeo.com/api/guides/start#register-your-app - -# VIMEO_ACCESS_TOKEN = -# VIMEO_CLIENT_ID = - - -## youtube_scratcher.py -# "Every request must either specify an API key (with the key parameter) [...]. -# Your API key is available in the Developer Console's API Access pane -# [https://console.developers.google.com/] for your project." -# https://developers.google.com/youtube/v3/docs - -# YOUTUBE_API_KEY = +# FLICKR_API_KEY = +# FLICKR_API_SECRET = diff --git a/history.md b/history.md index 33047d5a..fff7e7e9 100644 --- a/history.md +++ b/history.md @@ -2,6 +2,16 @@ [stateof]: https://github.com/creativecommons/stateof +## 2024 Google Summer of Code + +- [Automating Quantifying the Commons: Part 1][part1] +- [Automating Quantifying the Commons: Part 2][part2] +- [Documentation][doc] + +[part1]: https://opensource.creativecommons.org/blog/entries/2024-07-10-automating-quantifying/ +[part2]: https://opensource.creativecommons.org/blog/entries/2024-08-22-automating-quantifying/ +[doc]: https://unmarred-gym-686.notion.site/Automating-Quantifying-the-Commons-Documentation-441056ae02364d8a9a51d5e820401db5?pvs=74 + ## 2022 Data Science Discovery - UC Berkeley diff --git a/pre-automation/deviantart/deviantart_scratcher.py b/pre-automation/deviantart/deviantart_scratcher.py index 1fedc6a6..62dede50 100755 --- a/pre-automation/deviantart/deviantart_scratcher.py +++ b/pre-automation/deviantart/deviantart_scratcher.py @@ -20,9 +20,13 @@ import quantify # noqa: E402 # Setup paths, Date and LOGGER using quantify.setup() -PATH_REPO_ROOT, PATH_WORK_DIR, PATH_DOTENV, DATETIME_TODAY, LOGGER = ( - quantify.setup(__file__) -) +( + PATH_REPO_ROOT, + PATH_WORK_DIR, + PATH_DOTENV, + DATETIME_TODAY, + LOGGER, +) = quantify.setup(__file__) # Load environment variables load_dotenv(PATH_DOTENV) diff --git a/pre-automation/google_custom_search/google_scratcher.py b/pre-automation/google_custom_search/google_scratcher.py index 0bd05905..fe942851 100755 --- a/pre-automation/google_custom_search/google_scratcher.py +++ b/pre-automation/google_custom_search/google_scratcher.py @@ -21,9 +21,13 @@ import quantify # noqa: E402 # Setup paths, Date and LOGGER using quantify.setup() -PATH_REPO_ROOT, PATH_WORK_DIR, PATH_DOTENV, DATETIME_TODAY, LOGGER = ( - quantify.setup(__file__) -) +( + PATH_REPO_ROOT, + PATH_WORK_DIR, + PATH_DOTENV, + DATETIME_TODAY, + LOGGER, +) = quantify.setup(__file__) # Load environment variables load_dotenv(PATH_DOTENV) diff --git a/pyproject.toml b/pyproject.toml index 4caba1f9..bf1bad8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ line_length = 79 multi_line_output = 3 no_lines_before = 'LOCALFOLDER' profile = 'black' -src_paths = ['pre-automation'] +src_paths = ['pre-automation', 'scripts'] # [tool.pre-commit] diff --git a/scripts/1-fetch/deviantart_fetched.py b/scripts/1-fetch/deviantart_fetched.py new file mode 100644 index 00000000..c7c13974 --- /dev/null +++ b/scripts/1-fetch/deviantart_fetched.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python +""" +This file is dedicated to querying data from the DeviantArt API. +""" + +# Standard library +import argparse +import csv +import os +import sys +import traceback + +# Third-party +import pandas as pd +import requests +import yaml +from dotenv import load_dotenv +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# Load environment variables +load_dotenv(PATHS["dotenv"]) + +# Global Variable for API_KEYS indexing +API_KEYS_IND = 0 + +# Gets API_KEYS and PSE_KEY from .env file +API_KEYS = os.getenv("GOOGLE_API_KEYS").split(",") +PSE_KEY = os.getenv("PSE_KEY") + +# Log the start of the script execution +LOGGER.info("Script execution started.") + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser( + description="DeviantArt Data Fetching Script" + ) + parser.add_argument( + "--licenses", type=int, default=10, help="Number of licenses to query" + ) + return parser.parse_args() + + +def set_up_data_file(): + """ + Sets up the data file for recording results. + """ + LOGGER.info("Setting up the data file for recording results.") + header = "LICENSE TYPE,Document Count\n" + with open( + os.path.join(PATHS["data_phase"], "deviantart_fetched.csv"), "w" + ) as f: + f.write(header) + + +def get_license_list(): + """ + Provides the list of licenses from Creative Commons. + + Returns: + list: A list containing all license types that should be searched. + """ + LOGGER.info("Retrieving list of licenses from Creative Commons' record.") + cc_license_data = pd.read_csv( + os.path.join(PATHS["repo"], "legal-tool-paths.txt"), header=None + ) + license_pattern = r"((?:[^/]+/){2}(?:[^/]+)).*" + license_list = ( + cc_license_data[0] + .str.extract(license_pattern, expand=False) + .dropna() + .unique() + ) + return license_list + + +def get_request_url(license_type): + """ + Provides the API Endpoint URL for a specified license type. + + Args: + license_type: A string representing the type of license. + + Returns: + str: The API Endpoint URL for the query specified by parameters. + """ + LOGGER.info(f"Generating API Endpoint URL for license: {license_type}") + try: + api_key = API_KEYS[API_KEYS_IND] + return ( + "https://customsearch.googleapis.com/customsearch/v1" + f"?key={api_key}&cx={PSE_KEY}" + "&q=_&relatedSite=deviantart.com" + f'&linkSite=creativecommons.org{license_type.replace("/", "%2F")}' + ) + except IndexError: + LOGGER.error("Depleted all API Keys provided") + raise shared.QuantifyingException("No API keys left to use", 1) + + +def get_response_elems(license_type): + """ + Retrieves the number of documents for the specified license type. + + Args: + license_type: A string representing the type of license. + + Returns: + dict: A dictionary containing the total document count. + """ + LOGGER.info(f"Querying metadata for license: {license_type}") + try: + request_url = get_request_url(license_type) + max_retries = Retry( + total=5, + backoff_factor=10, + status_forcelist=[403, 408, 500, 502, 503, 504], + ) + session = requests.Session() + session.mount("https://", HTTPAdapter(max_retries=max_retries)) + with session.get(request_url) as response: + response.raise_for_status() + search_data = response.json() + return { + "totalResults": search_data["searchInformation"]["totalResults"] + } + except requests.exceptions.HTTPError as e: + global API_KEYS_IND + API_KEYS_IND += 1 + LOGGER.error(f"HTTP Error: {e}. Switching to the next API key.") + if API_KEYS_IND < len(API_KEYS): + return get_response_elems(license_type) + else: + raise shared.QuantifyingException( + f"HTTP Error: {e}. No API keys left.", 1 + ) + except requests.RequestException as e: + LOGGER.error(f"Request Exception: {e}") + raise shared.QuantifyingException(f"Request Exception: {e}", 1) + except KeyError as e: + LOGGER.error(f"KeyError: {e}.") + raise shared.QuantifyingException(f"KeyError: {e}", 1) + + +def retrieve_license_data(args): + """ + Retrieves the data of all license types specified. + + Args: + args: Parsed command-line arguments. + + Returns: + int: The total number of documents retrieved. + """ + LOGGER.info("Retrieving the data for all license types.") + licenses = get_license_list()[: args.licenses] + + # data = [] + total_docs_retrieved = 0 + + for license_type in licenses: + data_dict = get_response_elems(license_type) + total_docs_retrieved += int(data_dict["totalResults"]) + record_results(license_type, data_dict) + + return total_docs_retrieved + + +def record_results(license_type, data): + """ + Records the data for a specific license type into the CSV file. + + Args: + license_type: The license type. + data: A dictionary containing the data to record. + """ + LOGGER.info(f"Recording data for license: {license_type}") + row = [license_type, data["totalResults"]] + with open( + os.path.join(PATHS["data_phase"], "deviantart_fetched.csv"), + "a", + newline="", + ) as f: + writer = csv.writer(f) + writer.writerow(row) + + +def load_state(): + """ + Loads the state from a YAML file, returns the last recorded state. + + Returns: + dict: The last recorded state. + """ + if os.path.exists(PATHS["state"]): + with open(PATHS["state"], "r") as f: + return yaml.safe_load(f) + return {"total_records_retrieved (deviantart)": 0} + + +def save_state(state: dict): + """ + Saves the state to a YAML file. + + Args: + state: The state dictionary to save. + """ + with open(PATHS["state"], "w") as f: + yaml.safe_dump(state, f) + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + state = load_state() + total_docs_retrieved = state["total_records_retrieved (deviantart)"] + LOGGER.info(f"Initial total_records_retrieved: {total_docs_retrieved}") + goal_documents = 1000 # Set goal number of documents + + if total_docs_retrieved >= goal_documents: + LOGGER.info( + f"Goal of {goal_documents} documents already achieved." + " No further action required." + ) + return + + # Log the paths being used + shared.log_paths(LOGGER, PATHS) + + # Create data directory for this phase + os.makedirs(PATHS["data_phase"], exist_ok=True) + + if total_docs_retrieved == 0: + set_up_data_file() + + # Retrieve and record data + docs_retrieved = retrieve_license_data(args) + + # Update the state with the new count of retrieved records + total_docs_retrieved += docs_retrieved + LOGGER.info( + f"Total documents retrieved after fetching: {total_docs_retrieved}" + ) + state["total_records_retrieved (deviantart)"] = total_docs_retrieved + save_state(state) + + # Add and commit changes + shared.add_and_commit(PATHS["repo"], "Added and committed DeviantArt data") + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/1-fetch/flickr_fetched.py b/scripts/1-fetch/flickr_fetched.py new file mode 100755 index 00000000..8bcf2aff --- /dev/null +++ b/scripts/1-fetch/flickr_fetched.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python +""" +Script to fetch photo information from Flickr API, process the data, +and save it into multiple CSV files and a JSON file. +""" + +# Standard library +import argparse +import csv +import json +import os +import sys +import time +import traceback + +# Third-party +import flickrapi +import pandas as pd +from dotenv import load_dotenv + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup paths, and LOGGER using quantify.setup() +LOGGER, PATHS = shared.setup(__file__) + +# Load environment variables +load_dotenv(PATHS["dotenv"]) + +# Global variable: Number of retries for error handling +RETRIES = 0 + +# Log the start of the script execution +LOGGER.info("Script execution started.") + +# PATHS["data_phase"], "flickr_fetched", + +# Flickr API rate limits +FLICKR_API_CALLS_PER_HOUR = 3600 +SECONDS_PER_HOUR = 3600 +API_CALL_INTERVAL = SECONDS_PER_HOUR / FLICKR_API_CALLS_PER_HOUR + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser(description="Google Custom Search Script") + parser.add_argument( + "--records", type=int, default=1, help="Number of records per query" + ) + parser.add_argument( + "--pages", type=int, default=1, help="Number of pages to query" + ) + parser.add_argument( + "--licenses", type=int, default=1, help="Number of licenses to query" + ) + return parser.parse_args() + + +def to_df(datalist, namelist): + """ + Transform data into a DataFrame. + + Args: + - datalist (list): List of lists containing data. + - namelist (list): List of column names. + + Returns: + - df (DataFrame): DataFrame constructed from the data. + """ + LOGGER.info("Transforming data into a DataFrame.") + df = pd.DataFrame(datalist).transpose() + df.columns = namelist + return df + + +def df_to_csv(temp_list, name_list, temp_csv, final_csv): + """ + Save data to temporary CSV and then merge it with final CSV. + + Args: + - temp_list (list): csv that is used for saving data every 100 seconds. + - name_list (list): List of column names. + - temp_csv (str): Temporary CSV file path. + - final_csv (str): Final CSV file path. + """ + LOGGER.info("Saving data to temporary CSV and merging with final CSV.") + df = to_df(temp_list, name_list) + df.to_csv(temp_csv, index=False) + # Merge temporary CSV with final CSV, ignoring index to avoid duplication + if os.path.exists(final_csv): + df_final = pd.read_csv(final_csv) + df = pd.concat([df_final, df], ignore_index=True) + df.to_csv(final_csv, index=False) + + +def creat_lisoflis(size): + """ + Create one list of list [[],[],[]] to save all the columns with + each column as a list. + + Args: + - size (int): Size of the list of lists. + + Returns: + - temp_list (list): List of empty lists. + """ + LOGGER.info("Creating list of lists for data storage.") + temp_list = [[] for _ in range(size)] + return temp_list + + +def clean_saveas_csv(old_csv_str, new_csv_str): + """ + Clean empty columns and save CSV to a new file. + + Args: + - old_csv_str (str): Path to the old CSV file. + - new_csv_str (str): Path to the new CSV file. + """ + LOGGER.info("Cleaning empty columns and saving CSV to a new file.") + data = pd.read_csv(old_csv_str, low_memory=False) + data = data.loc[:, ~data.columns.str.contains("^Unnamed")] + data.to_csv(new_csv_str, index=False) + + +def query_helper1(raw, part, detail, temp_list, index): + """ + Helper function 1 for querying data. + + Args: + - raw (dict): Raw data from API. + - part (str): Part of the data. + - detail (str): Detail to be queried. + - temp_list (list): List to store queried data. + - index (int): Index of the data in temp_list. + """ + queried_raw = raw["photo"][part][detail] + temp_list[index].append(queried_raw) + + +def query_helper2(raw, part, temp_list, index): + """ + Helper function 2 for querying data. + + Args: + - raw (dict): Raw data from API. + - part (str): Part of the data. + - temp_list (list): List to store queried data. + - index (int): Index of the data in temp_list. + """ + queried_raw = raw["photo"][part] + temp_list[index].append(queried_raw) + + +def query_data(raw_data, name_list, data_list): + """ + Query useful data from raw pulled data and store it in lists. + + Args: + - raw_data (dict): Raw data from API. + - name_list (list): List of column names. + - data_list (list): List of lists to store data. + """ + LOGGER.info( + "Querying useful data from raw pulled data and storing it in lists." + ) + for a in range(len(name_list)): + if (0 <= a < 4) or a == 9: + query_helper2(raw_data, name_list[a], data_list, a) + elif a in [4, 5]: + query_helper1(raw_data, "owner", name_list[a], data_list, a) + elif a in [6, 7, 10]: + query_helper1(raw_data, name_list[a], "_content", data_list, a) + elif a == 8: + query_helper1(raw_data, "dates", "taken", data_list, a) + if a == 11: + tags = raw_data["photo"]["tags"]["tag"] + data_list[a].append([tag["raw"] for tag in tags] if tags else []) + + +def page1_reset(final_csv, raw_data): + """ + Reset page count and update total picture count. + + Args: + - final_csv (str): Path to the final CSV file. + - raw_data (dict): Raw data from API call. + + Returns: + - int: Total number of pages. + """ + LOGGER.info("Resetting page count and updating total picture count.") + if os.path.exists(final_csv): + data = pd.read_csv(final_csv, low_memory=False) + data.drop(data.columns, axis=1, inplace=True) + data.to_csv(final_csv, index=False) + return raw_data["photos"]["pages"] + + +def handle_rate_limiting(): + """ + Handle rate limiting by pausing execution + to avoid hitting the API rate limit. + """ + LOGGER.info( + f"Sleeping for {API_CALL_INTERVAL} seconds to handle rate limiting." + ) + time.sleep(API_CALL_INTERVAL) + + +def process_data(): + final_csv_path = os.path.join( + PATHS["data_phase"], "flickr_fetched", "final.csv" + ) + record_txt_path = os.path.join( + PATHS["data_phase"], "flickr_fetched", "rec.txt" + ) + hs_csv_path = os.path.join(PATHS["data_phase"], "flickr_fetched", "hs.csv") + + # Ensure files exist + if not os.path.exists(record_txt_path): + with open(record_txt_path, "w") as f: + f.write("1 1 1") # Start from page 1, license 1, total pages 1 + + if not os.path.exists(final_csv_path): + with open(final_csv_path, "w") as f: + pass # Create an empty final.csv + + if not os.path.exists(hs_csv_path): + with open(hs_csv_path, "w") as f: + pass # Create an empty hs.csv + + flickr = flickrapi.FlickrAPI( + os.getenv("FLICKR_API_KEY"), + os.getenv("FLICKR_API_SECRET"), + format="json", + ) + license_list = [1, 2, 3, 4, 5, 6, 9, 10] + name_list = [ + "id", + "dateuploaded", + "isfavorite", + "license", + "realname", + "location", + "title", + "description", + "dates", + "views", + "comments", + "tags", + ] + temp_list = creat_lisoflis(len(name_list)) + + # Dictionary to store photo data for each Creative Commons license + photo_data_dict = {license_num: [] for license_num in license_list} + + with open(record_txt_path) as f: + readed = f.read().split(" ") + j = int(readed[0]) + i = int(readed[1]) + total = int(readed[2]) + + while i in license_list: + while j <= total: + try: + photosJson = flickr.photos.search( + license=i, per_page=100, page=j + ) + handle_rate_limiting() + photos = json.loads(photosJson.decode("utf-8")) + id_list = [x["id"] for x in photos["photos"]["photo"]] + + if j == 1: + total = page1_reset(final_csv_path, photos) + + for index in range(len(id_list)): + detailJson = flickr.photos.getInfo( + license=i, photo_id=id_list[index] + ) + handle_rate_limiting() + photos_detail = json.loads(detailJson.decode("utf-8")) + LOGGER.info( + f"{index} id out of {len(id_list)} in " + f"license {i}, page {j} out of {total}" + ) + query_data(photos_detail, name_list, temp_list) + photo_data_dict[i].append(photos_detail) + + j += 1 + LOGGER.info( + f"Page {j} out of {total} in license " + f"{i} with retry number {RETRIES}" + ) + df_to_csv(temp_list, name_list, hs_csv_path, final_csv_path) + with open(record_txt_path, "w") as f: + f.write(f"{j} {i} {total}") + temp_list = creat_lisoflis(len(name_list)) + + if j > total: + license_i_path = os.path.join( + PATHS["data_phase"], + "flickr_fetched", + f"cleaned_license{i}.csv", + ) + clean_saveas_csv(final_csv_path, license_i_path) + i += 1 + j = 1 + while i not in license_list: + i += 1 + with open(record_txt_path, "w") as f: + f.write(f"{j} {i} {total}") + temp_list = creat_lisoflis(len(name_list)) + break + + except flickrapi.exceptions.FlickrError as e: + if "rate limit" in str(e).lower(): + LOGGER.warning("Rate limit reached, sleeping for an hour.") + time.sleep(SECONDS_PER_HOUR) + continue + else: + LOGGER.error(f"Flickr API error: {e}") + raise + + # Save the dictionary containing photo data to a JSON file + with open( + os.path.join(PATHS["data_phase"], "flickr_fetched", "photos.json"), "w" + ) as json_file: + json.dump(photo_data_dict, json_file) + + +def save_license_totals(): + LOGGER.info("Saving license totals.") + license_counts = {} + for i in [1, 2, 3, 4, 5, 6, 9, 10]: + df = pd.read_csv( + os.path.join( + PATHS["data_phase"], + "flickr_fetched", + f"cleaned_license{i}.csv", + ) + ) + license_counts[i] = len(df) + + license_total_path = os.path.join( + PATHS["data_phase"], "flickr_fetched", "license_total.csv" + ) + with open(license_total_path, "w") as csvfile: + writer = csv.writer(csvfile) + writer.writerow(["License", "Total"]) + for license, total in license_counts.items(): + writer.writerow([license, total]) + + +def main(): + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + process_data() + save_license_totals() + LOGGER.info("Script execution completed successfully.") + + # Add and commit changes + shared.add_and_commit(PATHS["repo"], "Added and committed new reports") + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/1-fetch/gcs_fetched.py b/scripts/1-fetch/gcs_fetched.py new file mode 100755 index 00000000..2a9792e4 --- /dev/null +++ b/scripts/1-fetch/gcs_fetched.py @@ -0,0 +1,401 @@ +#!/usr/bin/env python +""" +This file is dedicated to querying data from the Google Custom Search API. +""" +# Standard library +import argparse +import csv +import os +import re +import sys +import time +import traceback + +# import time +import urllib.parse + +# Third-party +import googleapiclient.discovery +import yaml +from dotenv import load_dotenv +from googleapiclient.errors import HttpError + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# Load environment variables +load_dotenv(PATHS["dotenv"]) + +# Constants +DEVELOPER_KEY = os.getenv("GCS_DEVELOPER_KEY") +CX = os.getenv("GCS_CX") +BASE_URL = "https://www.googleapis.com/customsearch/v1" + +# Log the start of the script execution +LOGGER.info("Script execution started.") + + +def get_search_service(): + """ + Creates and returns the Google Custom Search API service. + """ + LOGGER.info("Getting Google Custom Search API Service.") + return googleapiclient.discovery.build( + "customsearch", "v1", developerKey=DEVELOPER_KEY, cache_discovery=False + ) + + +def fetch_results( + args, service, start_index: int, cr=None, lr=None, link_site=None +) -> int: + """ + Fetch search results from Google Custom Search API. + Returns the total number of search results. + """ + LOGGER.info( + "Fetching and returning number of search results " + "from Google Custom Search API" + ) + records_per_query = args.records + max_retries = 5 + initial_delay = 1 # in seconds + + LOGGER.info(f"Records per query: {records_per_query}") + + for attempt in range(max_retries): + try: + # Added initial query_params parameter for logging purposes + query_params = { + "cx": CX, + "num": records_per_query, + "start": start_index, + "cr": cr, + "lr": lr, + "q": link_site, + } + # Filter out None values + query_params = { + k: v for k, v in query_params.items() if v is not None + } + + LOGGER.info(f"Query Parameters: {query_params}") + + results = service.cse().list(**query_params).execute() + + total_results = int( + results.get("searchInformation", {}).get("totalResults", 0) + ) + LOGGER.info(f"Total Results: {total_results}") + return total_results + + except HttpError as e: + if e.status_code == 429: + LOGGER.warning( + f"{e.status_code}: {e.reason}. retrying in {initial_delay}" + " seconds" + ) + time.sleep(initial_delay) + initial_delay *= 2 # Exponential backoff + else: + LOGGER.error(f"Error fetching results: {e}") + return 0 + LOGGER.error("Max tries exceeded. Could not complete the request.") + return 0 + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser(description="Google Custom Search Script") + parser.add_argument( + "--records", type=int, default=1, help="Number of records per query" + ) + parser.add_argument( + "--pages", type=int, default=1, help="Number of pages to query" + ) + parser.add_argument( + "--licenses", type=int, default=1, help="Number of licenses to query" + ) + return parser.parse_args() + + +def set_up_data_file(): + """ + Sets up the data files for recording results. + Results are currently grouped by location (country) and language + """ + LOGGER.info("Setting up the data files for recording results.") + header = ( + "LICENSE TYPE, No Priori, United States, Canada, " + "India, United Kingdom, Australia, Japan, " + "English, Spanish, French, Arabic, " + "Chinese (Simplified), Indonesian\n" + # "LICENSE TYPE,No Priori,Australia,Brazil,Canada,Egypt," + # "Germany,India,Japan,Spain," + # "United Kingdom,United States,Arabic," + # "Chinese (Simplified),Chinese (Traditional)," + # "English,French,Indonesian,Portuguese,Spanish\n" + ) + # open 'w' = open a file for writing + with open(os.path.join(PATHS["data_phase"], "gcs_fetched.csv"), "w") as f: + f.write(header) + + +# State Management +def load_state(): + """ + Loads the state from a JSON file, returns the last fetched start index. + """ + if os.path.exists(PATHS["state"]): + with open(PATHS["state"], "r") as f: + return yaml.safe_load(f) + return {"total_records_retrieved": 0} + + +def save_state(state: dict): + """ + Saves the state to a JSON file. + Parameters: + state_file: Path to the state file. + start_index: Last fetched start index. + """ + with open(PATHS["state"], "w") as f: + yaml.safe_dump(state, f) + + +def get_license_list(args): + """ + Provides the list of licenses from Creative Commons. + + Returns: + - np.array: + An np array containing all license types that should be searched + via Programmable Search Engine (PSE). + """ + LOGGER.info("Providing the list of licenses from Creative Commons") + license_list = [] + with open( + os.path.join(PATHS["data"], "legal-tool-paths.txt"), "r" + ) as file: + for line in file: + line = ( + line.strip() + ) # Strip newline and whitespace characters from the line + match = re.search(r"((?:[^/]+/){2}(?:[^/]+)).*", line) + if match: + license_list.append( + f"https://creativecommons.org/{match.group(1)}" + ) + return list(set(license_list))[ + : args.licenses + ] # Only the first license for testing + # Change [:1] to [args.licenses] later, to limit based on args + + +def get_country_list(select_all=False): + """ + Provides the list of countries to find Creative Commons usage data on. + LISTED BY API COUNTRY CODE + """ + LOGGER.info("Providing the list of countries to find CC usage data on.") + # countries = [] + # with open( + # os.path.join(PATHS["data"], "google_countries.tsv"), "r" + # ) as file: + # for line in file: + # country = line.strip().split("\t")[0] + # country = country.replace(",", " ") + # countries.append(country) + + # if select_all: + # return sorted(countries) + + # selected_countries = [ + # "India", + # "Japan", + # "United States", + # "Canada", + # "Brazil", + # "Germany", + # "United Kingdom", + # "Spain", + # "Australia", + # "Egypt", + # ] + # return sorted( + # [country for country in countries if country in selected_countries] + # ) + + # Commented out for testing purposes + return ["US", "CA", "IN", "UK", "AU", "JP"] + + +def get_lang_list(): + """ + Provides the list of languages to find Creative Commons usage data on. + LISTED BY API LANGUAGE ABBREVIATION + """ + LOGGER.info("Providing the list of languages to find CC usage data on.") + # languages = [] + # with open( + # os.path.join(PATHS["data"], "google_lang.txt"), "r" + # ) as file: + # for line in file: + # match = re.search(r'"([^"]+)"', line) + # if match: + # languages.append(match.group(1)) + + # selected_languages = [ + # "Arabic", + # "Chinese (Simplified)", + # "Chinese (Traditional)", + # "English", + # "French", + # "Indonesian", + # "Portuguese", + # "Spanish", + # ] + # return sorted([lang for lang in languages if lang in selected_languages]) + + # Commented out for testing purposes + return ["en", "es", "fr", "ar", "zh-CH", "id"] + + +def retrieve_license_data(args, service, license_list): + """ + Retrieves the data of all license types. + """ + LOGGER.info("Retrieving the data of all license types.") + selected_countries = get_country_list() + selected_languages = get_lang_list() + + data = [] + + for license_type in license_list: + encoded_license = urllib.parse.quote(license_type, safe=":/") + row = [license_type] + no_priori_search = fetch_results( + args, service, start_index=1, link_site=encoded_license + ) + row.append(no_priori_search) + + for country in selected_countries: + country_data = fetch_results( + args, + service, + start_index=1, + cr=f"country{country}", + link_site=encoded_license, + ) + row.append(country_data) + + for language in selected_languages: + language_data = fetch_results( + args, + service, + start_index=1, + lr=f"lang_{language}", + link_site=encoded_license, + ) + row.append(language_data) + + data.append(row) + + # Print the collected data for debugging + # Data Row Format: [License, No_Priori, United States, English] + for row in data: + LOGGER.info(f"Collected data row: {row}") + + return data + + +def record_results(results): + """ + Records the search results into the CSV file. + """ + LOGGER.info("Recording the search results into the CSV file.") + # open 'a' = Open for appending at the end of the file without truncating + with open( + os.path.join(PATHS["data_phase"], "gcs_fetched.csv"), "a", newline="" + ) as f: + writer = csv.writer(f) + for result in results: + writer.writerow(result) + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + state = load_state() + total_records_retrieved = state["total_records_retrieved"] + LOGGER.info(f"Initial total_records_retrieved: {total_records_retrieved}") + goal_records = 1000 # Set goal number of records + + if total_records_retrieved >= goal_records: + LOGGER.info( + f"Goal of {goal_records} records already achieved." + "No further action required." + ) + return + + shared.log_paths(LOGGER, PATHS) + + # Create data directory for this phase + os.makedirs(PATHS["data_phase"], exist_ok=True) + + if total_records_retrieved == 0: + set_up_data_file() + + service = get_search_service() + license_list = get_license_list(args) + + data = retrieve_license_data(args, service, license_list) + LOGGER.info(f"Final Data: {data}") + record_results(data) + + # Save the state checkpoint after fetching + total_records_retrieved += sum( + len(row) - 1 for row in data + ) # Exclude license type row + LOGGER.info( + f"total_records_retrieved after fetching: {total_records_retrieved}" + ) + state["total_records_retrieved"] = total_records_retrieved + save_state(state) + + # Add and commit changes + shared.add_and_commit(PATHS["repo"], "Added and committed new reports") + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/1-fetch/github_fetched.py b/scripts/1-fetch/github_fetched.py new file mode 100644 index 00000000..7782a1c6 --- /dev/null +++ b/scripts/1-fetch/github_fetched.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +""" +This file is dedicated to querying data from the GitHub API. +""" + +# Standard library +import argparse +import csv +import os +import sys +import traceback + +# Third-party +import requests +import yaml +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# Log the start of the script execution +LOGGER.info("Script execution started.") + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser(description="GitHub Data Fetching Script") + parser.add_argument( + "--licenses", type=int, default=3, help="Number of licenses to query" + ) + return parser.parse_args() + + +def set_up_data_file(): + """ + Sets up the data file for recording results. + """ + LOGGER.info("Setting up the data file for recording results.") + header = "LICENSE_TYPE,Repository Count\n" + with open( + os.path.join(PATHS["data_phase"], "github_fetched.csv"), "w" + ) as f: + f.write(header) + + +def get_response_elems(license_type): + """ + Provides the metadata for a query of + specified license type from GitHub API. + + Args: + license_type: A string representing the type of license. + Returns: + dict: A dictionary mapping metadata + to its value provided from the API query. + """ + LOGGER.info(f"Querying metadata for license: {license_type}") + try: + base_url = "https://api.github.com/search/repositories?q=license:" + request_url = f"{base_url}{license_type}" + max_retries = Retry( + total=5, + backoff_factor=10, + status_forcelist=[403, 408, 429, 500, 502, 503, 504], + ) + session = requests.Session() + session.mount("https://", HTTPAdapter(max_retries=max_retries)) + with session.get(request_url) as response: + response.raise_for_status() + search_data = response.json() + return {"totalResults": search_data["total_count"]} + except requests.HTTPError as e: + LOGGER.error(f"HTTP Error: {e}") + raise shared.QuantifyingException(f"HTTP Error: {e}", 1) + except requests.RequestException as e: + LOGGER.error(f"Request Exception: {e}") + raise shared.QuantifyingException(f"Request Exception: {e}", 1) + except KeyError as e: + LOGGER.error(f"KeyError: {e}.") + raise shared.QuantifyingException(f"KeyError: {e}", 1) + + +def retrieve_license_data(args): + """ + Retrieves the data of all license types specified. + """ + LOGGER.info("Retrieving the data for all license types.") + licenses = ["CC0-1.0", "CC-BY-4.0", "CC-BY-SA-4.0"][: args.licenses] + + data = [] + total_repos_retrieved = 0 + + for license_type in licenses: + data_dict = get_response_elems(license_type) + total_repos_retrieved += data_dict["totalResults"] + record_results(license_type, data_dict) + + for row in data: + LOGGER.info(f"Collected data row: {row}") + + return data + + +def record_results(license_type, data): + """ + Records the data for a specific license type into the CSV file. + """ + LOGGER.info(f"Recording data for license: {license_type}") + row = [license_type, data["totalResults"]] + with open( + os.path.join(PATHS["data_phase"], "github_fetched.csv"), + "a", + newline="", + ) as f: + writer = csv.writer(f) + writer.writerow(row) + + +def load_state(): + """ + Loads the state from a YAML file, returns the last recorded state. + """ + if os.path.exists(PATHS["state"]): + with open(PATHS["state"], "r") as f: + return yaml.safe_load(f) + return {"total_records_retrieved (github)": 0} + + +def save_state(state: dict): + """ + Saves the state to a YAML file. + Parameters: + state_file: Path to the state file. + state: The state dictionary to save. + """ + with open(PATHS["state"], "w") as f: + yaml.safe_dump(state, f) + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + state = load_state() + total_records_retrieved = state["total_records_retrieved (github)"] + LOGGER.info(f"Initial total_records_retrieved: {total_records_retrieved}") + goal_records = 1000 # Set goal number of records + + if total_records_retrieved >= goal_records: + LOGGER.info( + f"Goal of {goal_records} records already achieved." + " No further action required." + ) + return + + # Log the paths being used + shared.log_paths(LOGGER, PATHS) + + # Create data directory for this phase + os.makedirs(PATHS["data_phase"], exist_ok=True) + + if total_records_retrieved == 0: + set_up_data_file() + + # Retrieve and record data + repos_retrieved = retrieve_license_data(args) + + # Update the state with the new count of retrieved records + total_records_retrieved += repos_retrieved + LOGGER.info( + f"Total records retrieved after fetching: {total_records_retrieved}" + ) + state["total_records_retrieved (github)"] = total_records_retrieved + save_state(state) + + # Add and commit changes + shared.add_and_commit(PATHS["repo"], "Added and committed GitHub data") + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/1-fetch/internetarchive_fetched.py b/scripts/1-fetch/internetarchive_fetched.py new file mode 100644 index 00000000..a6af6780 --- /dev/null +++ b/scripts/1-fetch/internetarchive_fetched.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python +""" +This file is dedicated to querying data from the Internet Archive API. +""" + +# Standard library +import argparse +import csv +import os +import sys +import traceback + +# Third-party +import pandas as pd +import yaml +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# First-party/Local +from internetarchive.search import Search +from internetarchive.session import ArchiveSession + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# Log the start of the script execution +LOGGER.info("Script execution started.") + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser( + description="Internet Archive Data Fetching Script" + ) + parser.add_argument( + "--licenses", type=int, default=10, help="Number of licenses to query" + ) + return parser.parse_args() + + +def set_up_data_file(): + """ + Sets up the data file for recording results. + """ + LOGGER.info("Setting up the data file for recording results.") + header = "LICENSE TYPE,Document Count\n" + with open( + os.path.join(PATHS["data_phase"], "internetarchive_fetched.csv"), "w" + ) as f: + f.write(header) + + +def get_license_list(): + """ + Provides the list of licenses from a Creative Commons provided tool list. + + Returns: + list: A list containing all license types that + should be searched from Internet Archive. + """ + LOGGER.info("Retrieving list of licenses from Creative Commons' record.") + cc_license_data = pd.read_csv( + os.path.join(PATHS["repo"], "legal-tool-paths.txt"), header=None + ) + license_pattern = r"((?:[^/]+/){2}(?:[^/]+)).*" + license_list = ( + cc_license_data[0] + .str.extract(license_pattern, expand=False) + .dropna() + .unique() + ) + return license_list + + +def get_response_elems(license_type): + """ + Retrieves the number of documents for the + specified license type from Internet Archive. + + Args: + license_type: A string representing the type of license. + + Returns: + dict: A dictionary containing the total document count. + """ + LOGGER.info(f"Querying metadata for license: {license_type}") + try: + max_retries = Retry( + total=5, + backoff_factor=10, + status_forcelist=[403, 408, 429, 500, 502, 503, 504], + ) + search_session = ArchiveSession() + search_session.mount_http_adapter( + protocol="https://", + max_retries=HTTPAdapter(max_retries=max_retries), + ) + search_data = Search( + search_session, + f'/metadata/licenseurl:("http://creativecommons.org/' + f'{license_type}")', + ) + return {"totalResults": len(search_data)} + except Exception as e: + LOGGER.error(f"Error fetching data for license: {license_type}: {e}") + raise shared.QuantifyingException(f"Error fetching data: {e}", 1) + + +def retrieve_license_data(args): + """ + Retrieves the data of all license types specified. + + Args: + args: Parsed command-line arguments. + + Returns: + int: The total number of documents retrieved. + """ + LOGGER.info("Retrieving the data for all license types.") + licenses = get_license_list()[: args.licenses] + + # data = [] + total_docs_retrieved = 0 + + for license_type in licenses: + data_dict = get_response_elems(license_type) + total_docs_retrieved += int(data_dict["totalResults"]) + record_results(license_type, data_dict) + + return total_docs_retrieved + + +def record_results(license_type, data): + """ + Records the data for a specific license type into the CSV file. + + Args: + license_type: The license type. + data: A dictionary containing the data to record. + """ + LOGGER.info(f"Recording data for license: {license_type}") + row = [license_type, data["totalResults"]] + with open( + os.path.join(PATHS["data_phase"], "internetarchive_fetched.csv"), + "a", + newline="", + ) as f: + writer = csv.writer(f) + writer.writerow(row) + + +def load_state(): + """ + Loads the state from a YAML file, returns the last recorded state. + + Returns: + dict: The last recorded state. + """ + if os.path.exists(PATHS["state"]): + with open(PATHS["state"], "r") as f: + return yaml.safe_load(f) + return {"total_records_retrieved (internet archive)": 0} + + +def save_state(state: dict): + """ + Saves the state to a YAML file. + + Args: + state: The state dictionary to save. + """ + with open(PATHS["state"], "w") as f: + yaml.safe_dump(state, f) + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + state = load_state() + total_docs_retrieved = state["total_records_retrieved (internet archive)"] + LOGGER.info(f"Initial total_records_retrieved: {total_docs_retrieved}") + goal_documents = 1000 # Set goal number of documents + + if total_docs_retrieved >= goal_documents: + LOGGER.info( + f"Goal of {goal_documents} documents already achieved." + " No further action required." + ) + return + + # Log the paths being used + shared.log_paths(LOGGER, PATHS) + + # Create data directory for this phase + os.makedirs(PATHS["data_phase"], exist_ok=True) + + if total_docs_retrieved == 0: + set_up_data_file() + + # Retrieve and record data + docs_retrieved = retrieve_license_data(args) + + # Update the state with the new count of retrieved records + total_docs_retrieved += docs_retrieved + LOGGER.info( + f"Total documents retrieved after fetching: {total_docs_retrieved}" + ) + state["total_records_retrieved (internet archive)"] = total_docs_retrieved + save_state(state) + + # Add and commit changes + shared.add_and_commit( + PATHS["repo"], "Added and committed Internet Archive data" + ) + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/1-fetch/metmuseum_fetched.py b/scripts/1-fetch/metmuseum_fetched.py new file mode 100644 index 00000000..ede002a5 --- /dev/null +++ b/scripts/1-fetch/metmuseum_fetched.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python +""" +This file is dedicated to querying data from the MetMuseum API. +""" + +# Standard library +import argparse +import csv +import os +import sys +import traceback + +# Third-party +import requests +import yaml +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# Log the start of the script execution +LOGGER.info("Script execution started.") + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser( + description="MetMuseum Data Fetching Script" + ) + parser.add_argument( + "--licenses", type=int, default=1, help="Number of licenses to query" + ) + return parser.parse_args() + + +def set_up_data_file(): + """ + Sets up the data file for recording results. + """ + LOGGER.info("Setting up the data file for recording results.") + header = "LICENSE TYPE,Document Count\n" + with open( + os.path.join(PATHS["data_phase"], "metmuseum_fetched.csv"), "w" + ) as f: + f.write(header) + + +def get_request_url(): + """ + Provides the API Endpoint URL for MetMuseum data. + + Returns: + string: The API Endpoint URL for the query. + """ + LOGGER.info("Providing the API Endpoint URL for MetMuseum data.") + return "https://collectionapi.metmuseum.org/public/collection/v1/objects" + + +def get_response_elems(): + """ + Retrieves the total number of documents from the MetMuseum API. + + Returns: + dict: A dictionary containing the total document count. + """ + LOGGER.info("Querying metadata from the MetMuseum API.") + try: + request_url = get_request_url() + max_retries = Retry( + total=5, + backoff_factor=10, + status_forcelist=[403, 408, 429, 500, 502, 503, 504], + ) + session = requests.Session() + session.mount("https://", HTTPAdapter(max_retries=max_retries)) + with session.get(request_url) as response: + response.raise_for_status() + search_data = response.json() + return {"totalResults": search_data.get("total", 0)} + except Exception as e: + LOGGER.error(f"Error occurred during request: {e}") + raise shared.QuantifyingException(f"Error fetching data: {e}", 1) + + +def retrieve_license_data(): + """ + Retrieves the data for the public domain license from the MetMuseum API. + + Returns: + int: The total number of documents retrieved. + """ + LOGGER.info( + "Retrieving the data for public domain license from MetMuseum." + ) + data_dict = get_response_elems() + total_docs_retrieved = int(data_dict["totalResults"]) + record_results("publicdomain/zero/1.0", data_dict) + return total_docs_retrieved + + +def record_results(license_type, data): + """ + Records the data for a specific license type into the CSV file. + + Args: + license_type: The license type. + data: A dictionary containing the data to record. + """ + LOGGER.info(f"Recording data for license: {license_type}") + row = [license_type, data["totalResults"]] + with open( + os.path.join(PATHS["data_phase"], "metmuseum_fetched.csv"), + "a", + newline="", + ) as f: + writer = csv.writer(f) + writer.writerow(row) + + +def load_state(): + """ + Loads the state from a YAML file, returns the last recorded state. + + Returns: + dict: The last recorded state. + """ + if os.path.exists(PATHS["state"]): + with open(PATHS["state"], "r") as f: + return yaml.safe_load(f) + return {"total_records_retrieved (metmuseum)": 0} + + +def save_state(state: dict): + """ + Saves the state to a YAML file. + + Args: + state: The state dictionary to save. + """ + with open(PATHS["state"], "w") as f: + yaml.safe_dump(state, f) + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + # args = parse_arguments() + + state = load_state() + total_docs_retrieved = state["total_records_retrieved (metmusuem)"] + LOGGER.info(f"Initial total_documents_retrieved: {total_docs_retrieved}") + goal_documents = 1000 # Set goal number of documents + + if total_docs_retrieved >= goal_documents: + LOGGER.info( + f"Goal of {goal_documents} documents already achieved." + " No further action required." + ) + return + + # Log the paths being used + shared.log_paths(LOGGER, PATHS) + + # Create data directory for this phase + os.makedirs(PATHS["data_phase"], exist_ok=True) + + if total_docs_retrieved == 0: + set_up_data_file() + + # Retrieve and record data + docs_retrieved = retrieve_license_data() + + # Update the state with the new count of retrieved records + total_docs_retrieved += docs_retrieved + LOGGER.info( + f"Total documents retrieved after fetching: {total_docs_retrieved}" + ) + state["total_records_retrieved (metmuseum)"] = total_docs_retrieved + save_state(state) + + # Add and commit changes + shared.add_and_commit(PATHS["repo"], "Added and committed MetMuseum data") + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/1-fetch/vimeo_fetched.py b/scripts/1-fetch/vimeo_fetched.py new file mode 100644 index 00000000..650bc1d2 --- /dev/null +++ b/scripts/1-fetch/vimeo_fetched.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python +""" +This file is dedicated to querying data from the Vimeo API. +""" + +# Standard library +import argparse +import csv +import os +import sys +import traceback + +# Third-party +import requests +import yaml +from dotenv import load_dotenv +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# Load environment variables +load_dotenv(PATHS["dotenv"]) + +# Gets API_KEYS and PSE_KEY from .env file +ACCESS_TOKEN = os.getenv("VIMEO_ACCESS_TOKEN") +CLIENT_ID = os.getenv("VIMEO_CLIENT_ID") + +# Log the start of the script execution +LOGGER.info("Script execution started.") + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser(description="Vimeo Data Fetching Script") + parser.add_argument( + "--licenses", type=int, default=8, help="Number of licenses to query" + ) + return parser.parse_args() + + +def set_up_data_file(): + """ + Sets up the data file for recording results. + """ + LOGGER.info("Setting up the data file for recording results.") + header = "LICENSE TYPE,Document Count\n" + with open( + os.path.join(PATHS["data_phase"], "vimeo_fetched.csv"), "w" + ) as f: + f.write(header) + + +def get_license_list(): + """ + Provides the list of licenses to be searched via Vimeo API. + Returns: + List: A list containing all license types to be searched in Vimeo API. + """ + LOGGER.info("Providing the list of licenses to be searched in Vimeo API.") + return [ + "CC", + "CC-BY", + "CC-BY-NC", + "CC-BY-NC-ND", + "CC-BY-NC-SA", + "CC-BY-ND", + "CC-BY-SA", + "CC0", + ] + + +def get_request_url(license="CC"): + """ + Provides the API Endpoint URL for specified license combinations. + + Args: + license: A string representing the type of license. + + Returns: + string: A string representing the API Endpoint URL for the query. + """ + LOGGER.info( + "Providing the API Endpoint URL for specified parameter combinations." + ) + return ( + f"https://api.vimeo.com/videos?filter={license}" + f"&client_id={CLIENT_ID}&access_token={ACCESS_TOKEN}" + ) + + +def get_response_elems(license): + """ + Provides the metadata for query of specified license type. + + Args: + license: A string representing the type of license. + + Returns: + dict: A dictionary containing the metadata from the API query. + """ + LOGGER.info(f"Querying metadata for license: {license}") + try: + request_url = get_request_url(license=license) + max_retries = Retry( + total=5, + backoff_factor=10, + status_forcelist=[403, 408, 429, 500, 502, 503, 504], + ) + session = requests.Session() + session.mount("https://", HTTPAdapter(max_retries=max_retries)) + with session.get(request_url) as response: + response.raise_for_status() + search_data = response.json() + return {"totalResults": search_data["total"]} + except Exception as e: + LOGGER.error(f"Error occurred during request: {e}") + raise shared.QuantifyingException(f"Error fetching data: {e}", 1) + + +def retrieve_license_data(args): + """ + Retrieves the data of all license types specified. + + Returns: + int: The total number of documents retrieved. + """ + LOGGER.info("Retrieving the data for all license types from Vimeo.") + licenses = get_license_list()[: args.licenses] + + data = [] + total_docs_retrieved = 0 + + for license_type in licenses: + data_dict = get_response_elems(license_type) + total_docs_retrieved += data_dict["totalResults"] + record_results(license_type, data_dict) + + for row in data: + LOGGER.info(f"Collected data row: {row}") + + return data + + +def record_results(license_type, data): + """ + Records the data for a specific license type into the CSV file. + + Args: + license_type: The license type. + data: A dictionary containing the data to record. + """ + LOGGER.info(f"Recording data for license: {license_type}") + row = [license_type, data["totalResults"]] + with open( + os.path.join(PATHS["data_phase"], "vimeo_fetched.csv"), "a", newline="" + ) as f: + writer = csv.writer(f) + writer.writerow(row) + + +def load_state(): + """ + Loads the state from a YAML file, returns the last recorded state. + + Returns: + dict: The last recorded state. + """ + if os.path.exists(PATHS["state"]): + with open(PATHS["state"], "r") as f: + return yaml.safe_load(f) + return {"total_records_retrieved (vimeo)": 0} + + +def save_state(state: dict): + """ + Saves the state to a YAML file. + + Args: + state: The state dictionary to save. + """ + with open(PATHS["state"], "w") as f: + yaml.safe_dump(state, f) + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + state = load_state() + total_docs_retrieved = state["total_records_retrieved (vimeo)"] + LOGGER.info(f"Initial total_documents_retrieved: {total_docs_retrieved}") + goal_documents = 1000 # Set goal number of documents + + if total_docs_retrieved >= goal_documents: + LOGGER.info( + f"Goal of {goal_documents} documents already achieved." + " No further action required." + ) + return + + # Log the paths being used + shared.log_paths(LOGGER, PATHS) + + # Create data directory for this phase + os.makedirs(PATHS["data_phase"], exist_ok=True) + + if total_docs_retrieved == 0: + set_up_data_file() + + # Retrieve and record data + docs_retrieved = retrieve_license_data(args) + + # Update the state with the new count of retrieved records + total_docs_retrieved += docs_retrieved + LOGGER.info( + f"Total documents retrieved after fetching: {total_docs_retrieved}" + ) + state["total_records_retrieved (vimeo)"] = total_docs_retrieved + save_state(state) + + # Add and commit changes + shared.add_and_commit(PATHS["repo"], "Added and committed Vimeo data") + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/1-fetch/wikicommons_fetched.py b/scripts/1-fetch/wikicommons_fetched.py new file mode 100644 index 00000000..62900dbb --- /dev/null +++ b/scripts/1-fetch/wikicommons_fetched.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python +""" +This file is dedicated to querying data from WikiCommons. +""" + +# Standard library +import argparse +import csv +import os +import sys +import traceback + +# Third-party +import requests +import yaml +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# Log the start of the script execution +LOGGER.info("Script execution started.") + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser( + description="WikiCommons Data Fetching Script" + ) + parser.add_argument( + "--license_alias", + type=str, + default="Free_Creative_Commons_licenses", + help="Root category for recursive license search", + ) + return parser.parse_args() + + +def set_up_data_file(): + """ + Sets up the data file for recording results. + """ + LOGGER.info("Setting up the data file for recording results.") + header = "LICENSE TYPE,File Count,Page Count\n" + with open( + os.path.join(PATHS["data_phase"], "wikicommons_fetched.csv"), "w" + ) as f: + f.write(header) + + +def get_content_request_url(license): + """ + Provides the API Endpoint URL for + specified parameters' WikiCommons contents. + + Args: + license: A string representing the type of license. + + Returns: + string: The API Endpoint URL for the + query specified by this function's parameters. + """ + LOGGER.info(f"Generating content request URL for license: {license}") + return ( + r"https://commons.wikimedia.org/w/api.php?" + r"action=query&prop=categoryinfo&titles=" + f"Category:{license}&format=json" + ) + + +def get_subcat_request_url(license): + """ + Provides the API Endpoint URL for specified parameters' + WikiCommons subcategories for recursive searching. + + Args: + license: A string representing the type of license. + + Returns: + string: The API Endpoint URL for the query + specified by this function's parameters. + """ + LOGGER.info(f"Generating subcategory request URL for license: {license}") + base_url = ( + r"https://commons.wikimedia.org/w/api.php?" + r"action=query&cmtitle=" + f"Category:{license}" + r"&cmtype=subcat&list=categorymembers&format=json" + ) + return base_url + + +def get_subcategories(license, session): + """ + Obtain the subcategories of LICENSE in + WikiCommons Database for recursive searching. + + Args: + license: A string representing the type of license. + session: A requests.Session object for accessing API endpoints. + + Returns: + list: A list representing the subcategories + of current license type in WikiCommons dataset. + """ + LOGGER.info(f"Obtaining subcategories for license: {license}") + try: + request_url = get_subcat_request_url(license) + with session.get(request_url) as response: + response.raise_for_status() + search_data = response.json() + category_list = [ + members["title"].replace("Category:", "").replace("&", "%26") + for members in search_data["query"]["categorymembers"] + ] + return category_list + except Exception as e: + LOGGER.error(f"Error occurred during subcategory request: {e}") + raise shared.QuantifyingException( + f"Error fetching subcategories: {e}", 1 + ) + + +def get_license_contents(license, session): + """ + Provides the metadata for a query of specified parameters. + + Args: + license: A string representing the type of license. + session: A requests.Session object for accessing API endpoints. + + Returns: + dict: A dictionary mapping metadata + to its value provided from the API query. + """ + LOGGER.info(f"Querying content for license: {license}") + try: + request_url = get_content_request_url(license) + with session.get(request_url) as response: + response.raise_for_status() + search_data = response.json() + file_cnt = 0 + page_cnt = 0 + for id in search_data["query"]["pages"]: + lic_content = search_data["query"]["pages"][id] + file_cnt += lic_content["categoryinfo"]["files"] + page_cnt += lic_content["categoryinfo"]["pages"] + return {"total_file_cnt": file_cnt, "total_page_cnt": page_cnt} + except Exception as e: + LOGGER.error(f"Error occurred during content request: {e}") + raise shared.QuantifyingException(f"Error fetching content: {e}", 1) + + +def record_results(license_type, data): + """ + Records the data for a specific license type into the CSV file. + + Args: + license_type: The license type. + data: A dictionary containing the data to record. + """ + LOGGER.info(f"Recording data for license: {license_type}") + row = [license_type, data["total_file_cnt"], data["total_page_cnt"]] + with open( + os.path.join(PATHS["data_phase"], "wikicommons_fetched.csv"), + "a", + newline="", + ) as f: + writer = csv.writer(f) + writer.writerow(row) + + +def recur_record_all_licenses(license_alias="Free_Creative_Commons_licenses"): + """ + Recursively records the data of all license + types findable in the license list and its individual subcategories. + + Args: + license_alias: The root category alias for recursive search. + """ + LOGGER.info("Starting recursive recording of license data.") + + license_cache = {} + session = requests.Session() + max_retries = Retry( + total=5, + backoff_factor=10, + status_forcelist=[403, 408, 429, 500, 502, 503, 504], + ) + session.mount("https://", HTTPAdapter(max_retries=max_retries)) + + def recursive_traversing_subroutine(alias): + alias.replace(",", "|") + cur_category = alias.split("/")[-1] + subcategories = get_subcategories(cur_category, session) + if cur_category not in license_cache: + license_content = get_license_contents(cur_category, session) + record_results(alias, license_content) + license_cache[cur_category] = True + for cats in subcategories: + recursive_traversing_subroutine(f"{alias}/{cats}") + + recursive_traversing_subroutine(license_alias) + + +def load_state(): + """ + Loads the state from a YAML file, returns the last recorded state. + """ + if os.path.exists(PATHS["state"]): + with open(PATHS["state"], "r") as f: + return yaml.safe_load(f) + return {"total_records_retrieved (wikicommons)": 0} + + +def save_state(state: dict): + """ + Saves the state to a YAML file. + + Args: + state: The state dictionary to save. + """ + with open(PATHS["state"], "w") as f: + yaml.safe_dump(state, f) + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + state = load_state() + total_docs_retrieved = state["total_records_retrieved (wikicommons)"] + LOGGER.info(f"Initial total_documents_retrieved: {total_docs_retrieved}") + goal_documents = 1000 # Set goal number of documents + + if total_docs_retrieved >= goal_documents: + LOGGER.info( + f"Goal of {goal_documents} documents already achieved." + " No further action required." + ) + return + + # Log the paths being used + shared.log_paths(LOGGER, PATHS) + + # Create data directory for this phase + os.makedirs(PATHS["data_phase"], exist_ok=True) + + if total_docs_retrieved == 0: + set_up_data_file() + + # Retrieve and record data + recur_record_all_licenses(args.license_alias) + + # Update the state with the new count of retrieved records + total_docs_retrieved += 1 # Update with actual number retrieved + LOGGER.info( + f"Total documents retrieved after fetching: {total_docs_retrieved}" + ) + state["total_records_retrieved (wikicommons)"] = total_docs_retrieved + save_state(state) + + # Add and commit changes + shared.add_and_commit( + PATHS["repo"], "Added and committed WikiCommons data" + ) + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/1-fetch/wikipedia_fetched.py b/scripts/1-fetch/wikipedia_fetched.py new file mode 100644 index 00000000..b7d3c6f1 --- /dev/null +++ b/scripts/1-fetch/wikipedia_fetched.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python +""" +This file is dedicated to querying data from the Wikipedia API. +""" + +# Standard library +import argparse +import csv +import os +import sys +import traceback + +# Third-party +import requests +import yaml +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# Log the start of the script execution +LOGGER.info("Script execution started.") + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser( + description="Wikipedia Data Fetching Script" + ) + parser.add_argument( + "--languages", + type=str, + nargs="+", + default=["en"], + help="List of Wikipedia language codes to query", + ) + return parser.parse_args() + + +def set_up_data_file(): + """ + Sets up the data file for recording results. + """ + LOGGER.info("Setting up the data file for recording results.") + header = ( + "language,articles,edits,images," + "users,activeusers,admins,jobqueue,views\n" + ) + with open( + os.path.join(PATHS["data_phase"], "wikipedia_fetched.csv"), "w" + ) as f: + f.write(header) + + +def get_request_url(lang="en"): + """ + Provides the API Endpoint URL for specified parameter combinations. + + Args: + lang: A string representing the language for the Wikipedia API. + + Returns: + string: The API Endpoint URL for the query. + """ + LOGGER.info(f"Generating request URL for language: {lang}") + base_url = ( + r"https://{lang}.wikipedia.org/w/api.php" + "?action=query&meta=siteinfo" + "&siprop=statistics&format=json" + ) + return base_url.format(lang=lang) + + +def get_response_elems(language="en"): + """ + Provides the metadata for query of specified parameters. + + Args: + language: A string representing the language for the Wikipedia API. + + Returns: + dict: A dictionary mapping metadata + to its value provided from the API query. + """ + LOGGER.info(f"Querying Wikipedia API for language: {language}") + try: + request_url = get_request_url(language) + max_retries = Retry( + total=5, + backoff_factor=10, + status_forcelist=[403, 408, 429, 500, 502, 503, 504], + ) + session = requests.Session() + session.mount("https://", HTTPAdapter(max_retries=max_retries)) + with session.get(request_url) as response: + response.raise_for_status() + search_data = response.json() + stats = search_data.get("query", {}).get("statistics", {}) + stats["language"] = language + return stats + except Exception as e: + LOGGER.error(f"Error occurred during API request: {e}") + raise shared.QuantifyingException(f"Error fetching data: {e}", 1) + + +def record_results(stats): + """ + Records the data for a specific language into the CSV file. + + Args: + stats: A dictionary of Wikipedia statistics. + """ + LOGGER.info(f"Recording data for language: {stats.get('language')}") + row = [ + stats.get("language", ""), + stats.get("articles", 0), + stats.get("edits", 0), + stats.get("images", 0), + stats.get("users", 0), + stats.get("activeusers", 0), + stats.get("admins", 0), + stats.get("jobqueue", 0), + stats.get("views", 0), + ] + with open( + os.path.join(PATHS["data_phase"], "wikipedia_fetched.csv"), + "a", + newline="", + ) as f: + writer = csv.writer(f) + writer.writerow(row) + + +def retrieve_and_record_data(args): + """ + Retrieves and records the data for all specified languages. + """ + LOGGER.info("Starting data retrieval and recording.") + total_records_retrieved = 0 + + for lang in args.languages: + stats = get_response_elems(lang) + if stats: + record_results(stats) + total_records_retrieved += 1 + + return total_records_retrieved + + +def load_state(): + """ + Loads the state from a YAML file, returns the last recorded state. + """ + if os.path.exists(PATHS["state"]): + with open(PATHS["state"], "r") as f: + return yaml.safe_load(f) + return {"total_records_retrieved (wikipedia)": 0} + + +def save_state(state: dict): + """ + Saves the state to a YAML file. + + Args: + state: The state dictionary to save. + """ + with open(PATHS["state"], "w") as f: + yaml.safe_dump(state, f) + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + state = load_state() + total_records_retrieved = state["total_records_retrieved (wikipedia)"] + LOGGER.info(f"Initial total_records_retrieved: {total_records_retrieved}") + goal_records = 1000 # Set goal number of records + + if total_records_retrieved >= goal_records: + LOGGER.info( + f"Goal of {goal_records} records already achieved." + " No further action required." + ) + return + + # Log the paths being used + shared.log_paths(LOGGER, PATHS) + + # Create data directory for this phase + os.makedirs(PATHS["data_phase"], exist_ok=True) + + if total_records_retrieved == 0: + set_up_data_file() + + # Retrieve and record data + records_retrieved = retrieve_and_record_data(args) + + # Update the state with the new count of retrieved records + total_records_retrieved += records_retrieved + LOGGER.info( + f"Total records retrieved after fetching: {total_records_retrieved}" + ) + state["total_records_retrieved (wikipedia)"] = total_records_retrieved + save_state(state) + + # Add and commit changes + shared.add_and_commit(PATHS["repo"], "Added and committed Wikipedia data") + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/1-fetch/youtube_fetched.py b/scripts/1-fetch/youtube_fetched.py new file mode 100644 index 00000000..0a2069ff --- /dev/null +++ b/scripts/1-fetch/youtube_fetched.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python +""" +This file is dedicated to querying data from the YouTube API. +""" + +# Standard library +import argparse +import csv +import datetime +import os +import sys +import traceback + +# Third-party +import requests +import yaml +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# Load environment variables +API_KEY = os.getenv("YOUTUBE_API_KEY") + +# Log the start of the script execution +LOGGER.info("Script execution started.") + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + parser = argparse.ArgumentParser( + description="YouTube Data Fetching Script" + ) + parser.add_argument( + "--license_type", + type=str, + default="licenses/by/3.0", + help="License type to query", + ) + return parser.parse_args() + + +def set_up_data_file(): + """ + Sets up the data file for recording results. + """ + LOGGER.info("Setting up the data file for recording results.") + header = "LICENSE TYPE,Time,Document Count\n" + with open( + os.path.join(PATHS["data_phase"], "youtube_fetched.csv"), "w" + ) as f: + f.write(header) + + +def get_next_time_search_interval(): + """ + Provides the next searching interval of time + for Creative Commons licensed video. + + Yields: + tuple: A tuple representing the time search interval currently + dealt via 2 RFC 3339 formatted date-time values (by YouTube + API Standards), and current starting year/month of the interval. + """ + LOGGER.info("Generating time intervals for search.") + datetime_today = datetime.datetime.today() + cur_year, cur_month = 2009, 1 + while ( + cur_year * 100 + cur_month + <= datetime_today.year * 100 + datetime_today.month + ): + end_month, end_day = 12, 31 + if cur_month == 1: + end_month, end_day = 2, 28 + int(cur_year % 4 == 0) + elif cur_month == 3: + end_month, end_day = 4, 30 + elif cur_month == 5: + end_month, end_day = 6, 30 + elif cur_month == 7: + end_month, end_day = 8, 31 + elif cur_month == 9: + end_month, end_day = 10, 31 + elif cur_month == 11: + end_month, end_day = 12, 31 + yield ( + f"{cur_year}-{cur_month:02d}-01T00:00:00Z", + f"{cur_year}-{end_month:02d}-{end_day:02d}T23:59:59Z", + cur_year, + cur_month, + ) + cur_month += 2 + if cur_month > 12: + cur_month = 1 + cur_year += 1 + + +def get_request_url(time=None): + """ + Provides the API Endpoint URL for specified parameter combinations. + + Args: + time: A tuple indicating the time interval for the query. + + Returns: + string: The API Endpoint URL for the query. + """ + LOGGER.info("Generating request URL for time interval.") + base_url = ( + r"https://youtube.googleapis.com/youtube/v3/search?" + "part=snippet&type=video&videoLicense=creativeCommon" + ) + if time is not None: + base_url += f"&publishedAfter={time[0]}&publishedBefore={time[1]}" + return f"{base_url}&key={API_KEY}" + + +def get_response_elems(time=None): + """ + Provides the metadata for query of specified parameters. + + Args: + time: A tuple indicating the time interval for the query. + + Returns: + dict: A dictionary mapping metadata to + its value provided from the API query. + """ + LOGGER.info(f"Querying YouTube API for time interval: {time[2]}-{time[3]}") + try: + request_url = get_request_url(time=time) + max_retries = Retry( + total=5, + backoff_factor=10, + status_forcelist=[403, 408, 429, 500, 502, 503, 504], + ) + session = requests.Session() + session.mount("https://", HTTPAdapter(max_retries=max_retries)) + with session.get(request_url) as response: + response.raise_for_status() + search_data = response.json() + return search_data.get("pageInfo", {}).get("totalResults", 0) + except Exception as e: + LOGGER.error(f"Error occurred during API request: {e}") + raise shared.QuantifyingException(f"Error fetching data: {e}", 1) + + +def record_results(license_type, time, document_count): + """ + Records the data for a specific license type + and time interval into the CSV file. + + Args: + license_type: The license type. + time: The time interval. + document_count: The number of documents. + """ + LOGGER.info( + f"Recording data for license: {license_type}," + "time: {time}, count: {document_count}" + ) + row = [license_type, time, document_count] + with open( + os.path.join(PATHS["data_phase"], "youtube_fetched.csv"), + "a", + newline="", + ) as f: + writer = csv.writer(f) + writer.writerow(row) + + +def retrieve_and_record_data(args): + """ + Retrieves and records the data for all license types and time intervals. + """ + LOGGER.info("Starting data retrieval and recording.") + total_documents_retrieved = 0 + + for time in get_next_time_search_interval(): + document_count = get_response_elems(time=time) + record_results( + args.license_type, f"{time[2]}-{time[3]}", document_count + ) + total_documents_retrieved += document_count + + return total_documents_retrieved + + +def load_state(): + """ + Loads the state from a YAML file, returns the last recorded state. + """ + if os.path.exists(PATHS["state"]): + with open(PATHS["state"], "r") as f: + return yaml.safe_load(f) + return {"total_records_retrieved (youtube)": 0} + + +def save_state(state: dict): + """ + Saves the state to a YAML file. + + Args: + state: The state dictionary to save. + """ + with open(PATHS["state"], "w") as f: + yaml.safe_dump(state, f) + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + state = load_state() + total_docs_retrieved = state["total_records_retrieved (youtube)"] + LOGGER.info(f"Initial total_documents_retrieved: {total_docs_retrieved}") + goal_documents = 1000 # Set goal number of documents + + if total_docs_retrieved >= goal_documents: + LOGGER.info( + f"Goal of {goal_documents} documents already achieved." + " No further action required." + ) + return + + # Log the paths being used + shared.log_paths(LOGGER, PATHS) + + # Create data directory for this phase + os.makedirs(PATHS["data_phase"], exist_ok=True) + + if total_docs_retrieved == 0: + set_up_data_file() + + # Retrieve and record data + docs_retrieved = retrieve_and_record_data(args) + + # Update the state with the new count of retrieved records + total_docs_retrieved += docs_retrieved + LOGGER.info( + f"Total documents retrieved after fetching: {total_docs_retrieved}" + ) + state["total_records_retrieved (youtube)"] = total_docs_retrieved + save_state(state) + + # Add and commit changes + shared.add_and_commit(PATHS["repo"], "Added and committed YouTube data") + + # Push changes + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/deviantart_processed.py b/scripts/2-process/deviantart_processed.py new file mode 100644 index 00000000..170bd14d --- /dev/null +++ b/scripts/2-process/deviantart_processed.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Deviantart data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "deviantart_fetched", "license_total.csv") +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return None +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# Process data to compare totals + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/flickr_processed.py b/scripts/2-process/flickr_processed.py new file mode 100644 index 00000000..5d7cd1aa --- /dev/null +++ b/scripts/2-process/flickr_processed.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Flickr data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "flickr_fetched", "license_total.csv") +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return None +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# Process data to compare totals + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/gcs_processed.py b/scripts/2-process/gcs_processed.py new file mode 100755 index 00000000..a8a64bae --- /dev/null +++ b/scripts/2-process/gcs_processed.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Google Custom Search data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "gcs_fetched.csv") +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return None +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# # Process the data to compare by country +# compare_by_country(current_data, previous_data, +# current_quarter, previous_quarter) + +# # Process the data to compare by license +# compare_by_license(current_data, previous_data, +# current_quarter, previous_quarter) + +# # Process the data to compare by language +# compare_by_language(current_data, previous_data, +# current_quarter, previous_quarter) + + +# def compare_by_country(current_data, previous_data, +# current_quarter, previous_quarter): +# """ +# Compare the number of webpages licensed by country between two quarters. +# """ +# LOGGER.info(f"Comparing data by country between +# {current_quarter} and {previous_quarter}.") + +# # Get the list of country columns dynamically +# columns = [col.strip() for col in current_data.columns.tolist()] +# start_index = columns.index("United States") +# end_index = columns.index("Japan") + 1 + +# countries = columns[start_index:end_index] + +# current_country_data = current_data[countries].sum() +# previous_country_data = previous_data[countries].sum() + +# comparison = pd.DataFrame({ +# 'Country': countries, +# f'{current_quarter}': current_country_data.values, +# f'{previous_quarter}': previous_country_data.values, +# 'Difference': current_country_data.values +# - previous_country_data.values +# }) + +# LOGGER.info(f"Country comparison:\n{comparison}") + +# # Visualization code to be added here + + +# def compare_by_license(current_data, previous_data, +# current_quarter, previous_quarter): +# """ +# Compare the number of webpages licensed by license type +# between two quarters. +# """ +# LOGGER.info(f"Comparing data by license type +# between {current_quarter} and {previous_quarter}.") + +# current_license_data = +# current_data.groupby('LICENSE TYPE').sum().sum(axis=1) +# previous_license_data = +# previous_data.groupby('LICENSE TYPE').sum().sum(axis=1) + +# comparison = pd.DataFrame({ +# 'License Type': current_license_data.index, +# f'{current_quarter}': current_license_data.values, +# f'{previous_quarter}': previous_license_data.values, +# 'Difference': current_license_data.values +# - previous_license_data.values +# }) + +# LOGGER.info(f"License type comparison:\n{comparison}") + +# # Visualization code to be added here + + +# def compare_by_language(current_data, previous_data, +# current_quarter, previous_quarter): +# """ +# Compare the number of webpages licensed by language between two quarters. +# """ +# LOGGER.info(f"Comparing data by language between +# {current_quarter} and {previous_quarter}.") + +# # Get the list of language columns dynamically +# columns = [col.strip() for col in current_data.columns.tolist()] +# start_index = columns.index("English") +# languages = columns[start_index:] + +# current_language_data = current_data[languages].sum() +# previous_language_data = previous_data[languages].sum() + +# comparison = pd.DataFrame({ +# 'Language': languages, +# f'{current_quarter}': current_language_data.values, +# f'{previous_quarter}': previous_language_data.values, +# 'Difference': current_language_data.values +# - previous_language_data.values +# }) + +# LOGGER.info(f"Language comparison:\n{comparison}") + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.exit_code}") + sys.exit(e.exit_code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/github_processed.py b/scripts/2-process/github_processed.py new file mode 100644 index 00000000..6fcac75e --- /dev/null +++ b/scripts/2-process/github_processed.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Github data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "github_fetched") +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return None +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# Process data to compare totals + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/internetarchive_processed.py b/scripts/2-process/internetarchive_processed.py new file mode 100644 index 00000000..f6710252 --- /dev/null +++ b/scripts/2-process/internetarchive_processed.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Internet Archive data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "internetarchive_fetched") +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return None +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# Process data to compare totals + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/metmuseum_processed.py b/scripts/2-process/metmuseum_processed.py new file mode 100644 index 00000000..4778c2d4 --- /dev/null +++ b/scripts/2-process/metmuseum_processed.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Met Museum data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "metmuseum_fetched") +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return Nones +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# Process data to compare totals + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/vimeo_processed.py b/scripts/2-process/vimeo_processed.py new file mode 100644 index 00000000..aaa11bc8 --- /dev/null +++ b/scripts/2-process/vimeo_processed.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Vimeo data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "vimeo_fetched",) +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return None +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# Process data to compare totals + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/wikicommons_processed.py b/scripts/2-process/wikicommons_processed.py new file mode 100644 index 00000000..e1207d26 --- /dev/null +++ b/scripts/2-process/wikicommons_processed.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Wikicommons data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "wikicommons_fetched") +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return None +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# Process data to compare totals + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/wikipedia_processed.py b/scripts/2-process/wikipedia_processed.py new file mode 100644 index 00000000..4f6b534a --- /dev/null +++ b/scripts/2-process/wikipedia_processed.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Wikipedia data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "wikipedia_fetched") +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return None +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# Process data to compare totals + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/2-process/youtube_processed.py b/scripts/2-process/youtube_processed.py new file mode 100644 index 00000000..28c75c24 --- /dev/null +++ b/scripts/2-process/youtube_processed.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +""" +This file is dedicated to processing Youtube data +for analysis and comparison between quarters. +""" +# Standard library +import os +import sys +import traceback + +# import pandas as pd + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + +# def load_quarter_data(quarter): +# """ +# Load data for a specific quarter. +# """ +# file_path = os.path.join(PATHS["data"], f"{quarter}", +# "1-fetch", "youtube_fetched") +# if not os.path.exists(file_path): +# LOGGER.error(f"Data file for quarter {quarter} not found.") +# return None +# return pd.read_csv(file_path) + + +# def compare_data(current_quarter, previous_quarter): +# """ +# Compare data between two quarters. +# """ +# current_data = load_quarter_data(current_quarter) +# previous_data = load_quarter_data(previous_quarter) + +# if current_data is None or previous_data is None: +# return + +# Process data to compare totals + + +# def parse_arguments(): +# """ +# Parses command-line arguments, returns parsed arguments. +# """ +# LOGGER.info("Parsing command-line arguments") +# parser = argparse.ArgumentParser( +# description="Google Custom Search Comparison Report") +# parser.add_argument( +# "--current_quarter", type=str, required=True, +# help="Current quarter for comparison (e.g., 2024Q3)" +# ) +# parser.add_argument( +# "--previous_quarter", type=str, required=True, +# help="Previous quarter for comparison (e.g., 2024Q2)" +# ) +# return parser.parse_args() + + +def main(): + raise shared.QuantifyingException("No current code for Phase 2", 0) + + # # Fetch and merge changes + # shared.fetch_and_merge(PATHS["repo"]) + + # # Add and commit changes + # shared.add_and_commit(PATHS["repo"], "Fetched and updated new data") + + # # Push changes + # shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/deviantart_reports.py b/scripts/3-report/deviantart_reports.py new file mode 100644 index 00000000..8dfbd262 --- /dev/null +++ b/scripts/3-report/deviantart_reports.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing and analyzing the data collected +from Deviantart. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="Deviantart Reports") + parser.add_argument( + "--quarter", + "-q", + type=str, + required=False, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], + f"{selected_quarter}", + "1-fetch", + "deviantart_fetched.csv", + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +def visualize_by_license_type(data, args): + """ + Create a bar chart for the number of repositories licensed by license type. + """ + LOGGER.info( + "Creating a bar chart for the number of documents by license type." + ) + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x=data["LICENSE TYPE"], y=data["Document Count"]) + plt.title("Number of DeviantArt Documents by License Type") + plt.xlabel("License Type") + plt.ylabel("Document Count") + plt.xticks(rotation=45, ha="right") + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join( + output_directory, "deviantart_license_report.png" + ) + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "DeviantArt", + "Number of DeviantArt Documents by License Type", + "License Type Report", + args, + ) + + LOGGER.info("Visualization by license type created.") + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + visualize_by_license_type(data, args) + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit( + PATHS["repo"], "Added and committed new Deviantart reports" + ) + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/flickr_reports.py b/scripts/3-report/flickr_reports.py new file mode 100644 index 00000000..2c96aae3 --- /dev/null +++ b/scripts/3-report/flickr_reports.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing the data collected for Flickr. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +# import matplotlib.pyplot as plt +# import matplotlib.ticker as ticker +import pandas as pd + +# import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="Flickr Report") + parser.add_argument( + "--quarter", + "-q", + type=str, + required=False, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], + f"{selected_quarter}", + "1-fetch", + "flickr_fetched", + "final.csv", + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +# Add functions for individual license graphs + word clouds + total license + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + """ + Insert functions for Flickr + """ + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit( + PATHS["repo"], "Added and committed new GitHub reports" + ) + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/gcs_reports.py b/scripts/3-report/gcs_reports.py new file mode 100755 index 00000000..a83363d6 --- /dev/null +++ b/scripts/3-report/gcs_reports.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing and analyzing the data collected +from Google Custom Search. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +import matplotlib.pyplot as plt +import matplotlib.ticker as ticker +import pandas as pd +import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="Google Custom Search Report") + parser.add_argument( + "--quarter", + "-q", + type=str, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], f"{selected_quarter}", "1-fetch", "gcs_fetched.csv" + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +def visualize_by_country(data, args): + """ + Create a bar chart for the number of webpages licensed by country. + """ + LOGGER.info( + "Creating a bar chart for the number of webpages licensed by country." + ) + + selected_quarter = args.quarter + + # Get the list of country columns dynamically + columns = [col.strip() for col in data.columns.tolist()] + + start_index = columns.index("United States") + end_index = columns.index("Japan") + 1 + + countries = columns[start_index:end_index] + + data.columns = data.columns.str.strip() + + LOGGER.info(f"Cleaned Columns: {data.columns.tolist()}") + + # Aggregate the data by summing the counts for each country + country_data = data[countries].sum() + + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x=country_data.index, y=country_data.values) + plt.title( + f"Number of Google Webpages Licensed by Country ({selected_quarter})" + ) + plt.xlabel("Country") + plt.ylabel("Number of Webpages") + plt.xticks(rotation=45) + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + # Format the y-axis to display numbers without scientific notation + ax.get_yaxis().get_major_formatter().set_scientific(False) + ax.get_yaxis().set_major_formatter( + plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))) + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + # Create the directory if it does not exist + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join(output_directory, "gcs_country_report.png") + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "Google Custom Search", + "Number of Google Webpages Licensed by Country", + "Country Report", + args, + ) + + LOGGER.info("Visualization by country created.") + + +def visualize_by_license_type(data, args): + """ + Create a bar chart for the number of webpages licensed by license type + """ + LOGGER.info( + "Creating a bar chart for the number of " + "webpages licensed by license type." + ) + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + # Sum the values across all columns except the first one ('LICENSE TYPE') + license_data = data.set_index("LICENSE TYPE").sum(axis=1) + + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x=license_data.index, y=license_data.values) + plt.title( + f"Number of Webpages Licensed by License Type ({selected_quarter})" + ) + plt.xlabel("License Type") + plt.ylabel("Number of Webpages") + plt.xticks(rotation=45, ha="right") + + # Use shorter X axis labels + ax.set_xticklabels( + [ + "CC BY 2.5" if "by/2.5" in label else label + for label in license_data.index + ] + ) + + # Use the millions formatter for y-axis + def millions_formatter(x, pos): + "The two args are the value and tick position" + return f"{x * 1e-6:.1f}M" + + ax.yaxis.set_major_formatter(ticker.FuncFormatter(millions_formatter)) + + plt.tight_layout() + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + # Create the directory if it does not exist + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join(output_directory, "gcs_licensetype_report.png") + + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "Google Custom Search", + "Number of Webpages Licensed by License Type", + "License Type Report", + args, + ) + + LOGGER.info("Visualization by license type created.") + + +def visualize_by_language(data, args): + """ + Create a bar chart for the number of webpages licensed by language. + """ + LOGGER.info( + "Creating a bar chart for the number of webpages licensed by language." + ) + + selected_quarter = args.quarter + + # Get the list of country columns dynamically + columns = [col.strip() for col in data.columns.tolist()] + + start_index = columns.index("English") + end_index = columns.index("Indonesian") + 1 + + languages = columns[start_index:end_index] + + data.columns = data.columns.str.strip() + + LOGGER.info(f"Cleaned Columns: {data.columns.tolist()}") + + # Aggregate the data by summing the counts for each country + language_data = data[languages].sum() + + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x=language_data.index, y=language_data.values) + plt.title( + f"Number of Google Webpages Licensed by Language ({selected_quarter})" + ) + plt.xlabel("Language") + plt.ylabel("Number of Webpages") + plt.xticks(rotation=45) + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + # Format the y-axis to display numbers without scientific notation + ax.get_yaxis().get_major_formatter().set_scientific(False) + ax.get_yaxis().set_major_formatter( + plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))) + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + # Create the directory if it does not exist + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join(output_directory, "gcs_language_report.png") + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "Google Custom Search", + "Number of Google Webpages Licensed by Language", + "Language Report", + args, + ) + + LOGGER.info("Visualization by language created.") + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + visualize_by_country(data, args) + visualize_by_license_type(data, args) + visualize_by_language(data, args) + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit(PATHS["repo"], "Added and committed new reports") + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/github_reports.py b/scripts/3-report/github_reports.py new file mode 100644 index 00000000..9d2a51f2 --- /dev/null +++ b/scripts/3-report/github_reports.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing and analyzing the data collected +from GitHub. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="GitHub Data Report") + parser.add_argument( + "--quarter", + "-q", + type=str, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], f"{selected_quarter}", "1-fetch", "github_fetched.csv" + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +def visualize_by_license_type(data, args): + """ + Create a bar chart for the number of repositories licensed by license type. + """ + LOGGER.info( + "Creating a bar chart for the number " + "of repositories licensed by license type." + ) + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + plt.figure(figsize=(12, 8)) + try: + ax = sns.barplot(x=data["LICENSE_TYPE"], y=data["Repository Count"]) + except KeyError as e: + LOGGER.error(f"KeyError: {e}. Available columns are: {data.columns}") + return + + plt.title("Number of Repositories Licensed by License Type") + plt.xlabel("License Type") + plt.ylabel("Number of Repositories") + plt.xticks(rotation=45, ha="right") + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + # Create the directory if it does not exist + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join( + output_directory, "github_license_type_report.png" + ) + + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "GitHub Data", + "Number of Repositories Licensed by License Type", + "GitHub License Type Report", + args, + ) + + LOGGER.info("Visualization by license type created.") + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + visualize_by_license_type(data, args) + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit( + PATHS["repo"], "Added and committed new GitHub reports" + ) + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/internetarchive_reports.py b/scripts/3-report/internetarchive_reports.py new file mode 100644 index 00000000..8d98352c --- /dev/null +++ b/scripts/3-report/internetarchive_reports.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing and analyzing the data collected +from Internet Archive. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="Internet Archive Reports") + parser.add_argument( + "--quarter", + "-q", + type=str, + required=False, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], + f"{selected_quarter}", + "1-fetch", + "internetarchive_fetched.csv", + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +def visualize_by_license_type(data, args): + """ + Create a bar chart for the number of repositories licensed by license type. + """ + LOGGER.info( + "Creating a bar chart for the number of documents by license type." + ) + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x=data["LICENSE TYPE"], y=data["Document Count"]) + plt.title("Number of Internet Archive Documents by License Type") + plt.xlabel("License Type") + plt.ylabel("Document Count") + plt.xticks(rotation=45, ha="right") + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join( + output_directory, "internetarchive_license_report.png" + ) + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "Internet Archive", + "Number of Internet Archive Documents by License Type", + "License Type Report", + args, + ) + LOGGER.info("Visualization by license type created.") + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + visualize_by_license_type(data, args) + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit( + PATHS["repo"], "Added and committed new Internet Archive reports" + ) + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/metmuseum_reports.py b/scripts/3-report/metmuseum_reports.py new file mode 100644 index 00000000..0c1727ec --- /dev/null +++ b/scripts/3-report/metmuseum_reports.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing and analyzing the data collected +from Metmuseum. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="Met Museum Reports") + parser.add_argument( + "--quarter", + "-q", + type=str, + required=False, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], + f"{selected_quarter}", + "1-fetch", + "metmuseum_fetched.csv", + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +def visualize_by_license_type(data, args): + """ + Create a bar chart for the number of repositories licensed by license type. + """ + LOGGER.info( + "Creating a bar chart for the number of documents by license type." + ) + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x=data["LICENSE TYPE"], y=data["Document Count"]) + plt.title("Number of MetMuseum Documents by License Type") + plt.xlabel("License Type") + plt.ylabel("Document Count") + plt.xticks(rotation=45, ha="right") + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join(output_directory, "metmuseum_license_report.png") + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "MetMuseum", + "Number of MetMuseum Documents by License Type", + "License Type Report", + args, + ) + LOGGER.info("Visualization by license type created.") + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + visualize_by_license_type(data, args) + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit( + PATHS["repo"], "Added and committed new Metmuseum reports" + ) + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/vimeo_reports.py b/scripts/3-report/vimeo_reports.py new file mode 100644 index 00000000..3ca100bd --- /dev/null +++ b/scripts/3-report/vimeo_reports.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing and analyzing the data collected +from Vimeo. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="Vimeo Data Report") + parser.add_argument( + "--quarter", + "-q", + type=str, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], f"{selected_quarter}", "1-fetch", "vimeo_fetched.csv" + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +def visualize_by_license_type(data, args): + """ + Create a bar chart for the number of documents by license type. + """ + LOGGER.info( + "Creating a bar chart for the number of documents by license type." + ) + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x=data["LICENSE TYPE"], y=data["Document Count"]) + plt.title("Number of Vimeo Documents by License Type") + plt.xlabel("License Type") + plt.ylabel("Document Count") + plt.xticks(rotation=45, ha="right") + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join(output_directory, "vimeo_license_report.png") + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "Vimeo", + "Number of Vimeo Documents by License Type", + "License Type Report", + args, + ) + LOGGER.info("Visualization by license type created.") + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + visualize_by_license_type(data, args) + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit( + PATHS["repo"], "Added and committed new Vimeo reports" + ) + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/wikicommons_reports.py b/scripts/3-report/wikicommons_reports.py new file mode 100644 index 00000000..3b8f05cc --- /dev/null +++ b/scripts/3-report/wikicommons_reports.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing and analyzing the data collected +from Wikicommons. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="Wikicommons Data Report") + parser.add_argument( + "--quarter", + "-q", + type=str, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], + f"{selected_quarter}", + "1-fetch", + "wikicommons_fetched.csv", + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +def visualize_by_file_count(data, args): + """ + Create a bar chart for the number of files by license type. + """ + LOGGER.info( + "Creating a bar chart for the number of files by license type." + ) + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x=data["LICENSE TYPE"], y=data["File Count"]) + plt.title("Number of WikiCommons Files by License Type") + plt.xlabel("License Type") + plt.ylabel("File Count") + plt.xticks(rotation=45, ha="right") + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join(output_directory, "wikicommons_file_report.png") + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "WikiCommons", + "Number of WikiCommons Files by License Type", + "File Count Report", + args, + ) + LOGGER.info("Visualization by file count created.") + + +def visualize_by_page_count(data, args): + """ + Create a bar chart for the number of pages by license type. + """ + LOGGER.info( + "Creating a bar chart for the number of pages by license type." + ) + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x=data["LICENSE TYPE"], y=data["Page Count"]) + plt.title("Number of WikiCommons Pages by License Type") + plt.xlabel("License Type") + plt.ylabel("Page Count") + plt.xticks(rotation=45, ha="right") + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join(output_directory, "wikicommons_page_report.png") + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "WikiCommons", + "Number of WikiCommons Pages by License Type", + "Page Count Report", + args, + ) + LOGGER.info("Visualization by page count created.") + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + visualize_by_file_count(data, args) + visualize_by_page_count(data, args) + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit( + PATHS["repo"], "Added and committed new WikiCommons reports" + ) + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/wikipedia_reports.py b/scripts/3-report/wikipedia_reports.py new file mode 100644 index 00000000..3a6008d2 --- /dev/null +++ b/scripts/3-report/wikipedia_reports.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing and analyzing the data collected +from Wikipedia. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="Wikipedia Data Report") + parser.add_argument( + "--quarter", + "-q", + type=str, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], + f"{selected_quarter}", + "1-fetch", + "wikipedia_fetched.csv", + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +def visualize_by_language(data, args): + """ + Create a bar chart for various statistics by language. + """ + LOGGER.info("Creating bar charts for various statistics by language.") + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + columns_to_plot = ["pages", "articles", "edits", "images", "users"] + for column in columns_to_plot: + plt.figure(figsize=(12, 8)) + ax = sns.barplot(x="language", y=column, data=data) + plt.title(f"Wikipedia {column.capitalize()} by Language") + plt.xlabel("Language") + plt.ylabel(column.capitalize()) + plt.xticks(rotation=45, ha="right") + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join( + output_directory, f"wikipedia_{column}_report.png" + ) + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "Wikipedia", + f"Wikipedia {column.capitalize()} by Language", + f"{column.capitalize()} Report", + args, + ) + LOGGER.info(f"Visualization by {column} created.") + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + visualize_by_language(data, args) + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit( + PATHS["repo"], "Added and committed new Wikpedia reports" + ) + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/3-report/youtube_reports.py b/scripts/3-report/youtube_reports.py new file mode 100644 index 00000000..2363a22c --- /dev/null +++ b/scripts/3-report/youtube_reports.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +""" +This file is dedicated to visualizing and analyzing the data collected +from YouTube. +""" +# Standard library +import argparse +import os +import sys +import traceback +from datetime import datetime, timezone + +# Third-party +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from pandas import PeriodIndex + +# Add parent directory so shared can be imported +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +# First-party/Local +import shared # noqa: E402 + +# Setup +LOGGER, PATHS = shared.setup(__file__) + + +def parse_arguments(): + """ + Parses command-line arguments, returns parsed arguments. + """ + LOGGER.info("Parsing command-line arguments") + # Taken from shared module, fix later + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + parser = argparse.ArgumentParser(description="YouTube Data Report") + parser.add_argument( + "--quarter", + "-q", + type=str, + default=f"{quarter}", + help="Data quarter in format YYYYQx, e.g., 2024Q2", + ) + parser.add_argument( + "--skip-commit", + action="store_true", + help="Don't git commit changes (also skips git push changes)", + ) + parser.add_argument( + "--skip-push", + action="store_true", + help="Don't git push changes", + ) + parser.add_argument( + "--show-plots", + action="store_true", + help="Show generated plots (in addition to saving them)", + ) + args = parser.parse_args() + if args.skip_commit: + args.skip_push = True + return args + + +def load_data(args): + """ + Load the collected data from the CSV file. + """ + selected_quarter = args.quarter + + file_path = os.path.join( + PATHS["data"], f"{selected_quarter}", "1-fetch", "youtube_fetched.csv" + ) + + if not os.path.exists(file_path): + LOGGER.error(f"Data file not found: {file_path}") + return pd.DataFrame() + + data = pd.read_csv(file_path) + LOGGER.info(f"Data loaded from {file_path}") + return data + + +def visualize_by_license_type_over_time(data, args): + """ + Create a line chart for document count over time by license type. + """ + LOGGER.info( + "Creating a line chart for document count over time by license type." + ) + + selected_quarter = args.quarter + + # Strip any leading/trailing spaces from the columns + data.columns = data.columns.str.strip() + + plt.figure(figsize=(12, 8)) + ax = sns.lineplot( + x="Time", y="Document Count", hue="LICENSE TYPE", data=data + ) + plt.title("YouTube Document Count Over Time by License Type") + plt.xlabel("Time") + plt.ylabel("Document Count") + plt.xticks(rotation=45, ha="right") + + # Add value numbers to the top of each bar + for p in ax.patches: + ax.annotate( + format(p.get_height(), ",.0f"), + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha="center", + va="center", + xytext=(0, 9), + textcoords="offset points", + ) + + output_directory = os.path.join( + PATHS["data"], f"{selected_quarter}", "3-report" + ) + + LOGGER.info(f"Output directory: {output_directory}") + + os.makedirs(output_directory, exist_ok=True) + image_path = os.path.join( + output_directory, "youtube_license_over_time_report.png" + ) + plt.savefig(image_path) + + if args.show_plots: + plt.show() + + shared.update_readme( + PATHS, + image_path, + "YouTube", + "YouTube Document Count Over Time by License Type", + "License Over Time Report", + args, + ) + LOGGER.info("Visualization by license type over time created.") + + +def main(): + + # Fetch and merge changes + shared.fetch_and_merge(PATHS["repo"]) + + args = parse_arguments() + + data = load_data(args) + if data.empty: + return + + current_directory = os.getcwd() + LOGGER.info(f"Current working directory: {current_directory}") + + visualize_by_license_type_over_time(data, args) + + # Add and commit changes + if not args.skip_commit: + shared.add_and_commit( + PATHS["repo"], "Added and committed new YouTube reports" + ) + + # Push changes + if not args.skip_push: + shared.push_changes(PATHS["repo"]) + + +if __name__ == "__main__": + try: + main() + except shared.QuantifyingException as e: + if e.exit_code == 0: + LOGGER.info(e.message) + else: + LOGGER.error(e.message) + sys.exit(e.exit_code) + except SystemExit as e: + LOGGER.error(f"System exit with code: {e.code}") + sys.exit(e.code) + except KeyboardInterrupt: + LOGGER.info("(130) Halted via KeyboardInterrupt.") + sys.exit(130) + except Exception: + LOGGER.exception(f"(1) Unhandled exception: {traceback.format_exc()}") + sys.exit(1) diff --git a/scripts/shared.py b/scripts/shared.py new file mode 100644 index 00000000..8e4b29e1 --- /dev/null +++ b/scripts/shared.py @@ -0,0 +1,246 @@ +# Standard library +# import argparse +# Standard library +import logging +import os +from datetime import datetime, timezone + +# Third-party +from git import InvalidGitRepositoryError, NoSuchPathError, Repo +from pandas import PeriodIndex + + +class QuantifyingException(Exception): + def __init__(self, message, exit_code=None): + self.exit_code = exit_code if exit_code is not None else 1 + self.message = message + super().__init__(self.message) + + +def setup(current_file): + # Set up logging + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(module)s - %(message)s", + ) + logger = logging.getLogger(__name__) + + # Datetime + datetime_today = datetime.now(timezone.utc) + quarter = PeriodIndex([datetime_today.date()], freq="Q")[0] + + # Paths + paths = {} + paths["repo"] = os.path.dirname( + os.path.abspath(os.path.realpath(os.path.join(__file__, ".."))) + ) + paths["dotenv"] = os.path.join(paths["repo"], ".env") + paths["data"] = os.path.dirname( + os.path.abspath(os.path.realpath(current_file)) + ) + phase = os.path.basename( + os.path.dirname(os.path.abspath(os.path.realpath(current_file))) + ) + paths["data"] = os.path.join(paths["repo"], "data") + data_quarter = os.path.join(paths["data"], f"{quarter}") + paths["state"] = os.path.join(data_quarter, "state.yaml") + paths["data_phase"] = os.path.join(data_quarter, phase) + + paths["data_quarter"] = data_quarter + + return logger, paths + + +def log_paths(logger, paths): + paths_list = [] + for label, path in paths.items(): + label = f"{label}:" + paths_list.append(f"\n{' ' * 12}{label:<11} {path}") + paths_list = "".join(paths_list) + logger.info(f"PATHS:{paths_list}") + + +def fetch_and_merge(repo_path, branch="main"): + try: + repo = Repo(repo_path) + origin = repo.remote(name="origin") + origin.fetch() + + # Determine the branch to use + if branch is None: + # Use the current branch if no branch is provided + branch = repo.active_branch.name if repo.active_branch else "main" + + # Ensure that the branch exists in the remote + if f"origin/{branch}" not in [ref.name for ref in repo.refs]: + raise ValueError( + f"Branch '{branch}' does not exist in remote 'origin'" + ) + + repo.git.merge(f"origin/{branch}", allow_unrelated_histories=True) + logging.info(f"Fetched and merged latest changes from {branch}") + except InvalidGitRepositoryError: + raise QuantifyingException(f"Invalid Git repository at {repo_path}", 2) + except NoSuchPathError: + raise QuantifyingException(f"No such path: {repo_path}", 3) + except Exception as e: + raise QuantifyingException(f"Error during fetch and merge: {e}", 1) + + +def add_and_commit(repo_path, message): + try: + repo = Repo(repo_path) + if not repo.is_dirty(untracked_files=True): + logging.info("No changes to commit") + return + repo.git.add(update=True) + repo.index.commit(message) + logging.info("Changes committed") + except InvalidGitRepositoryError: + raise QuantifyingException(f"Invalid Git repository at {repo_path}", 2) + except NoSuchPathError: + raise QuantifyingException(f"No such path: {repo_path}", 3) + except Exception as e: + raise QuantifyingException(f"Error during add and commit: {e}", 1) + + +def push_changes(repo_path): + try: + repo = Repo(repo_path) + origin = repo.remote(name="origin") + origin.push() + logging.info("Changes pushed") + except InvalidGitRepositoryError: + raise QuantifyingException(f"Invalid Git repository at {repo_path}", 2) + except NoSuchPathError: + raise QuantifyingException(f"No such path: {repo_path}", 3) + except Exception as e: + raise QuantifyingException(f"Error during push changes: {e}", 1) + + +def update_readme( + paths, image_path, data_source, description, section_title, args +): + """ + Update the README.md file with the generated images and descriptions. + """ + readme_path = os.path.join(paths["data"], args.quarter, "README.md") + + # Define section markers for each data source + section_marker_start = f"" + section_marker_end = f"" + + # Define specific section markers for each report type + specific_section_start = f"" + specific_section_end = f"" + + # Convert image path to a relative path + rel_image_path = os.path.relpath(image_path, os.path.dirname(readme_path)) + + if os.path.exists(readme_path): + with open(readme_path, "r") as f: + lines = f.readlines() + else: + lines = [] + + # Ensure the title is at the top + title_line = f"# {args.quarter} Quantifying the Commons\n" + + if not lines or lines[0].strip() != title_line.strip(): + # Add title if not present or incorrect + lines = [title_line] + lines + + # Locate or create the data source section + section_start = section_end = None + for i, line in enumerate(lines): + if section_marker_start in line: + section_start = i + if section_marker_end in line: + section_end = i + + if section_start is None or section_end is None: + # If the data source section is not present, add it + lines.extend( + [ + f"## Data Source: {data_source}\n", + f"{section_marker_start}\n", + f"{section_marker_end}\n", + ] + ) + section_start = len(lines) - 2 + section_end = len(lines) - 1 + + # Locate or create the specific section within the data source section + specific_start = specific_end = None + for i in range(section_start, section_end): + if specific_section_start in lines[i]: + specific_start = i + if specific_section_end in lines[i]: + specific_end = i + + # Prepare the new content for this specific section + new_content = [ + f"{specific_section_start}\n", + f"### {section_title}\n", + f"![{description}]({rel_image_path})\n", + f"{description}\n", + f"{specific_section_end}\n", + ] + + # Replace or add the specific section content + if specific_start is not None and specific_end is not None: + # Replace the content between the specific markers + lines = ( + lines[:specific_start] + + new_content + + lines[specific_end + 1 :] # noqa: E203 + ) + else: + # Add new specific section before the end of the data source section + lines = lines[:section_end] + new_content + lines[section_end:] + + # Write back to the README.md file + with open(readme_path, "w") as f: + f.writelines(lines) + + logging.info( + f"Updated {readme_path} with new image and" + f"description for {section_title}." + ) + + +# def main(): +# parser = argparse.ArgumentParser(description="Git operations script") +# parser.add_argument( +# "--operation", +# type=str, +# required=True, +# help="Operation to perform: fetch_and_merge, add_and_commit, push", +# ) +# parser.add_argument("--message", type=str, help="Commit message") +# parser.add_argument( +# "--branch", +# type=str, +# default="refine-automation", +# help="Branch to fetch and merge from", +# ) +# args = parser.parse_args() + +# repo_path = os.getcwd() # Assuming the script runs in repo root + +# if args.operation == "fetch_and_merge": +# fetch_and_merge(repo_path, args.branch) +# elif args.operation == "add_and_commit": +# if not args.message: +# raise ValueError( +# "Commit message is required for add_and_commit operation" +# ) +# add_and_commit(repo_path, args.message) +# elif args.operation == "push": +# push_changes(repo_path) +# else: +# raise ValueError("Unsupported operation") + + +# if __name__ == "__main__": +# main() diff --git a/sources.md b/sources.md index 7ee8c895..596f0bbf 100644 --- a/sources.md +++ b/sources.md @@ -50,7 +50,7 @@ The Flickr Developer Guide](https://www.flickr.com/services/developer/)) - Data available through JSON format -## Google Custom Search JSON API +## GCS (Google Custom Search) JSON API **Description:** The Custom Search JSON API allows user-defined detailed query and access towards related query data using a programmable search engine. @@ -58,11 +58,20 @@ and access towards related query data using a programmable search engine. **API documentation links:** - [Custom Search JSON API Reference | Programmable Search Engine | Google Developers][google-json] +- [Google API Python Client Library][google-api-python] + - [Google API Client Library for Python Docs | + google-api-python-client][google-api-python] + - _Reference documentation for the core library + [googleapiclient][googleapiclient]._ + - See: googleapiclient.discovery > build + - _[Library reference documentation by API][gcs-library-ref]_ + - See Custom Search v1 [cse()][gcs-cse] - [Method: cse.list | Custom Search JSON API | Google Developers][cse-list] +- [XML API reference appendices][reference-appendix] **API information:** - API key required -- Query limit: 100 queries per day for free version +- Query limit: 100 queries per day - Data available through JSON format **Notes:** @@ -72,7 +81,12 @@ and access towards related query data using a programmable search engine. data's first column is sorted due to intermediate data analysis progress. [google-json]: https://developers.google.com/custom-search/v1/reference/rest +[google-api-python]: https://github.com/googleapis/google-api-python-client +[googleapiclient]: http://googleapis.github.io/google-api-python-client/docs/epy/index.html +[gcs-library-ref]: https://googleapis.github.io/google-api-python-client/docs/dyn/ +[gcs-cse]: https://googleapis.github.io/google-api-python-client/docs/dyn/customsearch_v1.cse.html [cse-list]: https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list +[reference-appendix]: https://developers.google.com/custom-search/docs/xml_results_appendices ## Internet Archive Python Interface