Skip to content

Commit

Permalink
Further dataset updates for the new URLs.
Browse files Browse the repository at this point in the history
  • Loading branch information
simonprickett authored and amotl committed Jan 10, 2025
1 parent 5e8e24d commit 0447079
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
2 changes: 1 addition & 1 deletion topic/machine-learning/automl/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def churn_dataset(cratedb):
Provide test case with a provisioned dataset.
"""
cratedb.import_csv_pandas(
filepath="https://github.com/crate/cratedb-datasets/raw/main/machine-learning/automl/churn-dataset.csv",
filepath="https://cdn.crate.io/downloads/datasets/cratedb-datasets/machine-learning/automl/churn-dataset.csv",
tablename="pycaret_churn",
)
cratedb.run_sql("REFRESH TABLE pycaret_churn;")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
"metadata": {},
"outputs": [],
"source": [
"loader = PyPDFLoader(\"https://github.com/crate/cratedb-datasets/raw/main/machine-learning/fulltext/White%20paper%20-%20Time-series%20data%20in%20manufacturing.pdf\")\n",
"loader = PyPDFLoader(\"https://cdn.crate.io/downloads/datasets/cratedb-datasets/machine-learning/fulltext/White%20paper%20-%20Time-series%20data%20in%20manufacturing.pdf\")\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
"pages = loader.load_and_split(text_splitter)"
]
Expand Down
24 changes: 12 additions & 12 deletions topic/machine-learning/mlops-mlflow/tracking_merlion.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"\n",
"\n",
"# Download the data from the Numenta Anomaly Benchmark\n",
"data = pd.read_csv(\"https://github.com/crate/cratedb-datasets/raw/main/timeseries/nab-machine-failure.csv\")\n",
"data = pd.read_csv(\"https://cdn.crate.io/downloads/datasets/cratedb-datasets/timeseries/nab-machine-failure.csv\")\n",
"\n",
"# Connect to a self-managed CrateDB instance.\n",
"CRATEDB_HTTP_URL_DEFAULT = \"http://crate@localhost:4200/\"\n",
Expand All @@ -47,16 +47,22 @@
},
{
"cell_type": "markdown",
"source": [
"2. Import data into CrateDB"
],
"metadata": {
"collapsed": false
}
},
"source": [
"2. Import data into CrateDB"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"is_executing": true
}
},
"outputs": [],
"source": [
"# Split the data into chunks of 1000 rows each for better insert performance\n",
Expand All @@ -72,13 +78,7 @@
" for chunk in chunks:\n",
" cursor.executemany(\"INSERT INTO machine_data (timestamp, temperature) VALUES (?, ?);\", list(chunk.itertuples(index=False, name=None)))\n",
" cursor.execute(\"REFRESH TABLE machine_data;\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"is_executing": true
}
}
]
},
{
"cell_type": "markdown",
Expand Down

0 comments on commit 0447079

Please sign in to comment.