Skip to content

Commit

Permalink
[feat] Add rpm/tpm extension proc plugin (#79)
Browse files Browse the repository at this point in the history
* Add rpm/tpm extension proc plugin

* add license header

* add extproc auto creation with httproute

* update go.mod

* address comments

* update plugin dir strcuture

---------

Co-authored-by: varungupta <varungupta@BYTEDANCE.COM>
  • Loading branch information
varungup90 and varungupta authored Aug 15, 2024
1 parent b42f435 commit af45221
Show file tree
Hide file tree
Showing 13 changed files with 635 additions and 2 deletions.
2 changes: 1 addition & 1 deletion docs/development/app/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ kind load docker-image aibrix/vllm:v0.1.0
2. Deploy mocked model image
```shell
kubectl apply -f deployment.yaml
kubectl port-forward svc/lora-test-mac-only 8000:8000 &
kubectl port-forward svc/llama2-70b 8000:8000 &
```

## Test python app separately
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
k8s.io/code-generator v0.29.2
k8s.io/klog/v2 v2.110.1
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00
k8s.io/utils v0.0.0-20230726121419-3b25d923346b
sigs.k8s.io/controller-runtime v0.17.3
sigs.k8s.io/gateway-api v1.0.0
)
Expand Down Expand Up @@ -70,7 +71,6 @@ require (
k8s.io/apiextensions-apiserver v0.29.2 // indirect
k8s.io/component-base v0.29.2 // indirect
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 // indirect
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
Expand Down
18 changes: 18 additions & 0 deletions pkg/plugins/ratelimiter/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
## Multistage build
FROM golang:1.21 as build
ENV CGO_ENABLED=0
ENV GOOS=linux
ENV GOARCH=amd64

WORKDIR /src
COPY . .
RUN go mod download
RUN go build -o /ext_proc

## Multistage deploy
FROM gcr.io/distroless/base-debian10

WORKDIR /
COPY --from=build /ext_proc /ext_proc

ENTRYPOINT ["/ext_proc"]
15 changes: 15 additions & 0 deletions pkg/plugins/ratelimiter/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
.PHONY: build
build:
docker rmi aibrix/tpm:v0.1.0 --force
docker build -t aibrix/tpm:v0.1.0 -f Dockerfile .
kind load docker-image aibrix/tpm:v0.1.0

.PHONY: apply
apply:
kubectl apply -f deployment.yaml
kubectl apply -f plugins.yaml

.PHONY: delete
delete:
kubectl delete -f deployment.yaml
kubectl delete -f plugins.yaml
27 changes: 27 additions & 0 deletions pkg/plugins/ratelimiter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@


# Install backed storage for persist rpm/tpm configuration
kubectl apply -f redis.yaml

# Add rpm/tpm config
kubectl exec -it redis-master-<pod-name> -- redis-cli

set aibrix:<user-name>_TPM_LIMIT 100
set aibrix:<user-name>_RPM_LIMIT 10

# Install extension proc
make build && make apply

# Test requests
```shell
curl -v http://localhost:8888/v1/chat/completions \
-H "user: varun" \
-H "model: llama2-70b" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-d '{
"model": "llama2-70b",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
```
45 changes: 45 additions & 0 deletions pkg/plugins/ratelimiter/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
apiVersion: v1
kind: Service
metadata:
name: grpc-ext-proc
spec:
selector:
app: grpc-ext-proc
ports:
- protocol: TCP
port: 50052
targetPort: 50052
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: grpc-ext-proc
spec:
replicas: 1
selector:
matchLabels:
app: grpc-ext-proc
template:
metadata:
labels:
app: grpc-ext-proc
spec:
containers:
- name: golang-app-container
image: aibrix/tpm:v0.1.0
ports:
- containerPort: 50052
env:
- name: REDIS_HOST
value: redis-master
- name: REDIS_PORT
value: "6379"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace

25 changes: 25 additions & 0 deletions pkg/plugins/ratelimiter/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
module github.com/aibrix/aibrix/pkg/plugins/ext_proc

go 1.21

require (
github.com/coocood/freecache v1.2.4
github.com/envoyproxy/go-control-plane v0.12.0
github.com/redis/go-redis/v9 v9.6.1
github.com/sashabaranov/go-openai v1.28.1
google.golang.org/grpc v1.65.0
k8s.io/klog v1.0.0
)

require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
github.com/golang/protobuf v1.5.4 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect
google.golang.org/protobuf v1.34.1 // indirect
)
40 changes: 40 additions & 0 deletions pkg/plugins/ratelimiter/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b h1:ga8SEFjZ60pxLcmhnThWgvH2wg8376yUJmPhEH4H3kw=
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
github.com/coocood/freecache v1.2.4 h1:UdR6Yz/X1HW4fZOuH0Z94KwG851GWOSknua5VUbb/5M=
github.com/coocood/freecache v1.2.4/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/envoyproxy/go-control-plane v0.12.0 h1:4X+VP1GHd1Mhj6IB5mMeGbLCleqxjletLK6K0rbxyZI=
github.com/envoyproxy/go-control-plane v0.12.0/go.mod h1:ZBTaoJ23lqITozF0M6G4/IragXCQKCnYbmlmtHvwRG0=
github.com/envoyproxy/protoc-gen-validate v1.0.4 h1:gVPz/FMfvh57HdSJQyvBtF00j8JU4zdyUgIUNhlgg0A=
github.com/envoyproxy/protoc-gen-validate v1.0.4/go.mod h1:qys6tmnRsYrQqIhm2bvKZH4Blx/1gTIZ2UKVY1M+Yew=
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/redis/go-redis/v9 v9.6.1 h1:HHDteefn6ZkTtY5fGUE8tj8uy85AHk6zP7CpzIAM0y4=
github.com/redis/go-redis/v9 v9.6.1/go.mod h1:0C0c6ycQsdpVNQpxb1njEQIqkx5UcsM8FJCQLgE9+RA=
github.com/sashabaranov/go-openai v1.28.1 h1:aREx6faUTeOZNMDTNGAY8B9vNmmN7qoGvDV0Ke2J1Mc=
github.com/sashabaranov/go-openai v1.28.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 h1:Zy9XzmMEflZ/MAaA7vNcoebnRAld7FsPW1EeBB7V0m8=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0=
google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8=
k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I=
Loading

0 comments on commit af45221

Please sign in to comment.