1
+ #
2
+ # CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP)
3
+ # (C) Cloudera, Inc. 2024
4
+ # All rights reserved.
5
+ #
6
+ # Applicable Open Source License: Apache 2.0
7
+ #
8
+ # NOTE: Cloudera open source products are modular software products
9
+ # made up of hundreds of individual components, each of which was
10
+ # individually copyrighted. Each Cloudera open source product is a
11
+ # collective work under U.S. Copyright Law. Your license to use the
12
+ # collective work is as provided in your written agreement with
13
+ # Cloudera. Used apart from the collective work, this file is
14
+ # licensed for your use pursuant to the open source license
15
+ # identified above.
16
+ #
17
+ # This code is provided to you pursuant a written agreement with
18
+ # (i) Cloudera, Inc. or (ii) a third-party authorized to distribute
19
+ # this code. If you do not have a written agreement with Cloudera nor
20
+ # with an authorized and properly licensed third party, you do not
21
+ # have any rights to access nor to use this code.
22
+ #
23
+ # Absent a written agreement with Cloudera, Inc. ("Cloudera") to the
24
+ # contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY
25
+ # KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED
26
+ # WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO
27
+ # IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND
28
+ # FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU,
29
+ # AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS
30
+ # ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE
31
+ # OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY
32
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR
33
+ # CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES
34
+ # RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF
35
+ # BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
36
+ # DATA.
37
+ #
38
+
39
+ #
40
+ # CLOUDERA APPLIED MACHINE LEARNING PROTOTYPE (AMP)
41
+ # (C) Cloudera, Inc. 2024
42
+ # All rights reserved.
43
+ #
44
+ # Applicable Open Source License: Apache 2.0
45
+ #
46
+ #
47
+ # This code is provided to you pursuant a written agreement with
48
+ # (i) Cloudera, Inc. or (ii) a third-party authorized to distribute
49
+ # this code. If you do not have a written agreement with Cloudera nor
50
+ # with an authorized and properly licensed third party, you do not
51
+ # have any rights to access nor to use this code.
52
+ #
53
+ # Absent a written agreement with Cloudera, Inc. ("Cloudera") to the
54
+ # contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY
55
+ # KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED
56
+ # WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO
57
+ # IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND
58
+ # FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU,
59
+ # AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS
60
+ # ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE
61
+ # OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY
62
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR
63
+ # CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES
64
+ # RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF
65
+ # BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
66
+ # DATA.
67
+ #
68
+
69
+ import os
70
+ import socket
71
+
72
+ def validate ():
73
+ print ("Validating environment variables..." )
74
+ access_key_id = os .environ .get ("AWS_ACCESS_KEY_ID" , None )
75
+ secret_key_id = os .environ .get ("AWS_SECRET_ACCESS_KEY" , None )
76
+ default_region = os .environ .get ("AWS_DEFAULT_REGION" , None )
77
+ document_bucket = os .environ .get ("S3_RAG_DOCUMENT_BUCKET" , None )
78
+
79
+ caii_domain = os .environ .get ("CAII_DOMAIN" , None )
80
+
81
+ # 1. if you don't have a caii_domain, you _must_ have an access key, secret key, and default region
82
+ if caii_domain is None :
83
+ if access_key_id is None or secret_key_id is None or default_region is None :
84
+ print ("ERROR: Using Bedrock for LLMs/embeddings; missing required environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION" )
85
+ exit (1 )
86
+ else :
87
+ try :
88
+ socket .gethostbyname (caii_domain )
89
+ print (f"CAII domain { caii_domain } can be resolved" )
90
+ except socket .error :
91
+ print (f"ERROR: CAII domain { caii_domain } can not be resolved" )
92
+ exit (1 )
93
+
94
+ # 2. if you have a document_bucket, you _must_ have an access key, secret key, and default region
95
+ if document_bucket is not None :
96
+ if access_key_id is None or secret_key_id is None or default_region is None :
97
+ print ("ERROR: Using S3 for document storage; missing required environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION" )
98
+ exit (1 )
99
+
100
+ if caii_domain is not None :
101
+ print ("Using CAII for LLMs/embeddings; CAII_DOMAIN is set" )
102
+
103
+ else :
104
+ print ("Using Bedrock for LLMs/embeddings; AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_DEFAULT_REGION are set" )
105
+
106
+ if document_bucket is not None :
107
+ print ("Using S3 for document storage (S3_RAG_DOCUMENT_BUCKET is set)" )
108
+ else :
109
+ print ("Using the project filesystem for document storage (S3_RAG_DOCUMENT_BUCKET is not set)" )
110
+ # TODO: verify that the bucket prefix is always optional
111
+
112
+ validate ()
0 commit comments