-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path__init__.py
172 lines (147 loc) · 5.48 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# Standard for all skills:
# - Author: Rodrigo Souza - https://www.linkedin.com/in/rodrigossz/
# - This code is an Azure Cognitive Search Python Custom Skill.
# - The output is the "text" element within the "data" section of the json file.
# - For production environments add all best practices, logging, and error management that you need.
# - Letters cases are not changed.
# - All JSON files are returned with the original accents. For that, we use ensure_ascii=False.
# - You can find other Python Custom Skills here: https://github.com/Rodrigossz/Python-Custom-Skills-Toolkit
#
# Specific comments
# - This code removes duplicates from a list. It is case sensitive: "Flamengo" is different from "FLAMENGO" and both will be inserted.
# - This custom skill gets the input, maybe a list from keyPhrases or entities extraction, and loads EACH ELEMENT as a document in a CosmosDb Collection
# - Change the code as you want: You can group the elements and insert one document per Cognitive Search document. That's a very good idea, BTW.
# - This code works great with CosmosDb Emulator! Check it out! https://docs.microsoft.com/en-us/azure/cosmos-db/local-emulator#installation
import logging
import azure.functions as func
import json
from azure.cosmos import exceptions, CosmosClient, PartitionKey
import uuid
def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
try:
body = json.dumps(req.get_json())
except ValueError:
return func.HttpResponse(
"Invalid body",
status_code=400
)
if body:
result = compose_response(body)
return func.HttpResponse(result, mimetype="application/json")
else:
return func.HttpResponse(
"Invalid body",
status_code=400
)
def compose_response(json_data):
values = json.loads(json_data)['values']
# Prepare the Output before the loop
results = {}
results["values"] = []
for value in values:
outputRecord = transform_value(value)
if outputRecord != None:
results["values"].append(outputRecord)
# Keeping the original accentuation with ensure_ascii=False
return json.dumps(results, ensure_ascii=False)
## Perform an operation on a record
def transform_value(value):
try:
recordId = value['recordId']
except AssertionError as error:
return None
# Validate the inputs
try:
assert ('data' in value), "'data' field is required."
data = value['data']
assert ('text' in data), "'text' field is required in 'data' object."
except AssertionError as error:
return (
{
"recordId": recordId,
"data":{},
"errors": [ { "message": "Error:" + error.args[0] } ]
})
try:
# Getting the items from the values/data/text
myStringList = []
myStringList = value['data']['text']
# Cleaning the list, removing duplicates
myStringList = list(dict.fromkeys(myStringList))
# Now let's insert one document for each organization in the list.
# Change as you need!!
# Initialize the Cosmos client
# Add your own data or use a better method to get this information
endpoint = "your-CosmosDb-URI"
key = 'your-CosmosDb-key'
# <create_cosmos_client>
client = CosmosClient(endpoint, key)
# </create_cosmos_client>
# Create a database
# Use the name that you want.
# <create_database_if_not_exists>
database_name = 'MyCustomSkillData'
database = client.create_database_if_not_exists(id=database_name)
# </create_database_if_not_exists>
# Create a container
# Customize as you want: person names, key phrases, etc.
# Or you can insert the list under a document, keeping parity between CosmosDb and Azure Cognitive Search
# Using a good partition key improves the performance of database operations.
# Also, change the partition key as you want/need.
# <create_container_if_not_exists>
container_name = 'Organizations'
container = database.create_container_if_not_exists(
id=container_name,
partition_key=PartitionKey(path="/name"),
offer_throughput=400
)
# </create_container_if_not_exists>
# <create_item>
for item in myStringList:
newDoc = {
'id': str(uuid.uuid4()),
'name': item
}
container.create_item(body=newDoc)
# </create_item>
except:
return (
{
"recordId": recordId,
"errors": [ { "message": "Could not complete operation for record." } ]
})
return ({
"recordId": recordId,
"data": {
"text": "OK"
}
})
# Testing the function
myInput = {
"values": [
{
"recordId": "0",
"data":
{
"text": ["FLAMENGO","VASCO","FLAMENGO","FLUMINENSE","FLAMENGO"]
}
} ,
{
"recordId": "1",
"data":
{
"text": [""]
}
} ,
{
"recordId": "2",
"data":
{
"text": ["FLAMENGO","Flamengo","flamengo","FLAMENGO"]
}
}
]
}
inputTest = json.dumps(myInput)
compose_response (inputTest)