-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement REPL mode in Spark and enhance error handling (#99)
- **Read (R)**: Source queries from the OpenSearch flint-query-submission index. - **Execute (E)**: Run queries within the SparkContext environment. - **Publish (P)**: - Push results to the flint-query-result index. - Update query state in the flint-query-submission index. - **Loop (L)**: Continue process until a set exit condition is reached. Additional improvements: - Enable cancelation of running statements in Spark. - Fail statements that wait too long. - Provide detailed error feedback. - Introduce query run time metric. Testing: - Introduced unit tests. - Conducted manual tests. Signed-off-by: Kaituo Li <kaituo@amazon.com>
- Loading branch information
Showing
19 changed files
with
1,614 additions
and
257 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
flint-core/src/main/scala/org/opensearch/flint/core/storage/OpenSearchUpdater.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package org.opensearch.flint.core.storage; | ||
|
||
import org.opensearch.action.support.WriteRequest; | ||
import org.opensearch.action.update.UpdateRequest; | ||
import org.opensearch.client.RequestOptions; | ||
import org.opensearch.client.RestHighLevelClient; | ||
import org.opensearch.common.xcontent.XContentType; | ||
import org.opensearch.flint.core.FlintClient; | ||
import org.opensearch.flint.core.FlintClientBuilder; | ||
import org.opensearch.flint.core.FlintOptions; | ||
|
||
import java.io.IOException; | ||
|
||
public class OpenSearchUpdater { | ||
private final String indexName; | ||
|
||
private final FlintClient flintClient; | ||
|
||
|
||
public OpenSearchUpdater(String indexName, FlintClient flintClient) { | ||
this.indexName = indexName; | ||
this.flintClient = flintClient; | ||
} | ||
|
||
public void upsert(String id, String doc) { | ||
// we might need to keep the updater for a long time. Reusing the client may not work as the temporary | ||
// credentials may expire. | ||
// also, failure to close the client causes the job to be stuck in the running state as the client resource | ||
// is not released. | ||
try (RestHighLevelClient client = flintClient.createClient()) { | ||
UpdateRequest | ||
updateRequest = | ||
new UpdateRequest(indexName, id).doc(doc, XContentType.JSON) | ||
.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL) | ||
.docAsUpsert(true); | ||
client.update(updateRequest, RequestOptions.DEFAULT); | ||
} catch (IOException e) { | ||
throw new RuntimeException(String.format( | ||
"Failed to execute update request on index: %s, id: %s", | ||
indexName, | ||
id), e); | ||
} | ||
} | ||
|
||
public void update(String id, String doc) { | ||
try (RestHighLevelClient client = flintClient.createClient()) { | ||
UpdateRequest | ||
updateRequest = | ||
new UpdateRequest(indexName, id).doc(doc, XContentType.JSON) | ||
.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL); | ||
client.update(updateRequest, RequestOptions.DEFAULT); | ||
} catch (IOException e) { | ||
throw new RuntimeException(String.format( | ||
"Failed to execute update request on index: %s, id: %s", | ||
indexName, | ||
id), e); | ||
} | ||
} | ||
|
||
public void updateIf(String id, String doc, long seqNo, long primaryTerm) { | ||
try (RestHighLevelClient client = flintClient.createClient()) { | ||
UpdateRequest | ||
updateRequest = | ||
new UpdateRequest(indexName, id).doc(doc, XContentType.JSON) | ||
.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL) | ||
.setIfSeqNo(seqNo) | ||
.setIfPrimaryTerm(primaryTerm); | ||
client.update(updateRequest, RequestOptions.DEFAULT); | ||
} catch (IOException e) { | ||
throw new RuntimeException(String.format( | ||
"Failed to execute update request on index: %s, id: %s", | ||
indexName, | ||
id), e); | ||
} | ||
} | ||
} |
70 changes: 70 additions & 0 deletions
70
flint-spark-integration/src/main/scala/org/opensearch/flint/app/FlintCommand.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.app | ||
|
||
import org.json4s.{Formats, NoTypeHints} | ||
import org.json4s.JsonAST.JString | ||
import org.json4s.native.JsonMethods.parse | ||
import org.json4s.native.Serialization | ||
|
||
class FlintCommand( | ||
var state: String, | ||
val query: String, | ||
// statementId is the statement type doc id | ||
val statementId: String, | ||
val queryId: String, | ||
val submitTime: Long, | ||
var error: Option[String] = None) { | ||
def running(): Unit = { | ||
state = "running" | ||
} | ||
|
||
def complete(): Unit = { | ||
state = "success" | ||
} | ||
|
||
def fail(): Unit = { | ||
state = "failed" | ||
} | ||
|
||
def isRunning(): Boolean = { | ||
state == "running" | ||
} | ||
|
||
def isComplete(): Boolean = { | ||
state == "success" | ||
} | ||
|
||
def isFailed(): Boolean = { | ||
state == "failed" | ||
} | ||
} | ||
|
||
object FlintCommand { | ||
|
||
implicit val formats: Formats = Serialization.formats(NoTypeHints) | ||
|
||
def deserialize(command: String): FlintCommand = { | ||
val meta = parse(command) | ||
val state = (meta \ "state").extract[String] | ||
val query = (meta \ "query").extract[String] | ||
val statementId = (meta \ "statementId").extract[String] | ||
val queryId = (meta \ "queryId").extract[String] | ||
val submitTime = (meta \ "submitTime").extract[Long] | ||
val maybeError: Option[String] = (meta \ "error") match { | ||
case JString(str) => Some(str) | ||
case _ => None | ||
} | ||
|
||
new FlintCommand(state, query, statementId, queryId, submitTime, maybeError) | ||
} | ||
|
||
def serialize(flintCommand: FlintCommand): String = { | ||
// we only need to modify state and error | ||
Serialization.write( | ||
Map("state" -> flintCommand.state, "error" -> flintCommand.error.getOrElse(""))) | ||
} | ||
} |
55 changes: 55 additions & 0 deletions
55
flint-spark-integration/src/main/scala/org/opensearch/flint/app/FlintInstance.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.app | ||
|
||
import org.json4s.{Formats, NoTypeHints} | ||
import org.json4s.JsonAST.JString | ||
import org.json4s.native.JsonMethods.parse | ||
import org.json4s.native.Serialization | ||
import org.opensearch.index.seqno.SequenceNumbers | ||
|
||
// lastUpdateTime is added to FlintInstance to track the last update time of the instance. Its unit is millisecond. | ||
class FlintInstance( | ||
val applicationId: String, | ||
val jobId: String, | ||
// sessionId is the session type doc id | ||
val sessionId: String, | ||
val state: String, | ||
val lastUpdateTime: Long, | ||
val error: Option[String] = None) {} | ||
|
||
object FlintInstance { | ||
|
||
implicit val formats: Formats = Serialization.formats(NoTypeHints) | ||
|
||
def deserialize(job: String): FlintInstance = { | ||
val meta = parse(job) | ||
val applicationId = (meta \ "applicationId").extract[String] | ||
val state = (meta \ "state").extract[String] | ||
val jobId = (meta \ "jobId").extract[String] | ||
val sessionId = (meta \ "sessionId").extract[String] | ||
val lastUpdateTime = (meta \ "lastUpdateTime").extract[Long] | ||
val maybeError: Option[String] = (meta \ "error") match { | ||
case JString(str) => Some(str) | ||
case _ => None | ||
} | ||
|
||
new FlintInstance(applicationId, jobId, sessionId, state, lastUpdateTime, maybeError) | ||
} | ||
|
||
def serialize(job: FlintInstance): String = { | ||
Serialization.write( | ||
Map( | ||
"type" -> "session", | ||
"sessionId" -> job.sessionId, | ||
"error" -> job.error.getOrElse(""), | ||
"applicationId" -> job.applicationId, | ||
"jobId" -> job.jobId, | ||
"state" -> job.state, | ||
// update last update time | ||
"lastUpdateTime" -> System.currentTimeMillis())) | ||
} | ||
} |
Oops, something went wrong.