11locals {
22 transactions_cleaning_arguments = {
3- " --END_DATE" = " 2024-11-30 "
4- " --START_DATE" = " 2024-11 -1"
3+ " --END_DATE" = " 2024-12-31 "
4+ " --START_DATE" = " 2024-10 -1"
55 " --NETWORK_PREFIX" = " all"
66 }
77
8- transactions_cleaning_final_arguments = merge (
9- var. default_arguments ,
10- local. transactions_cleaning_arguments
11- )
8+ iceberg_argument = {
9+ " --datalake-formats" = " iceberg"
10+ }
11+
12+ converting_to_recordio_arguments = {
13+ " --extra-jars" = " s3://bdp-glue-scripts/sagemaker-spark_2.12-spark_3.3.0-1.4.6.dev0.jar"
14+ " --python-modules-installer-option" = " -r"
15+ " --additional-python-modules" = " s3://bdp-glue-scripts/requirements.txt"
16+ }
17+
18+ anomaly_classification_arguments = {
19+ " --QUANTILE" = 0.99
20+ }
21+
1222}
1323
1424resource "aws_glue_job" "transactions_cleaning" {
@@ -20,10 +30,10 @@ resource "aws_glue_job" "transactions_cleaning" {
2030 python_version = " 3"
2131 }
2232
23- worker_type = " G.1X "
33+ worker_type = " G.2X "
2434 number_of_workers = 10
2535 glue_version = " 5.0"
26- default_arguments = local. transactions_cleaning_final_arguments
36+ default_arguments = merge (var . default_arguments , local. iceberg_argument , local . transactions_cleaning_arguments )
2737 timeout = 120
2838}
2939
@@ -37,10 +47,10 @@ resource "aws_glue_job" "wallets_aggregations" {
3747 python_version = " 3"
3848 }
3949
40- worker_type = " G.1X "
50+ worker_type = " G.2X "
4151 number_of_workers = 10
4252 glue_version = " 5.0"
43- default_arguments = var. default_arguments
53+ default_arguments = merge ( var. default_arguments , local . iceberg_argument )
4454 timeout = 120
4555}
4656
@@ -53,9 +63,105 @@ resource "aws_glue_job" "feature_scaling" {
5363 python_version = " 3"
5464 }
5565
66+ worker_type = " G.2X"
67+ number_of_workers = 10
68+ glue_version = " 5.0"
69+ default_arguments = merge (var. default_arguments , local. iceberg_argument )
70+ timeout = 120
71+ }
72+
73+ resource "aws_glue_job" "spearman_feature_selection" {
74+ name = " Spearman feature selection"
75+ role_arn = var. glue_role_arn
76+ command {
77+ name = " glueetl"
78+ script_location = " s3://${ var . glue_script_bucket } /spearman.py"
79+ python_version = " 3"
80+ }
81+
82+ worker_type = " G.2X"
83+ number_of_workers = 10
84+ glue_version = " 5.0"
85+ default_arguments = var. default_arguments
86+ timeout = 300
87+ }
88+
89+ resource "aws_glue_job" "convert_parquet_to_csv" {
90+ name = " Convert parquet to CSV"
91+ role_arn = var. glue_role_arn
92+ command {
93+ name = " glueetl"
94+ script_location = " s3://${ var . glue_script_bucket } /convert_features_to_csv.py"
95+ python_version = " 3"
96+ }
97+
98+ worker_type = " G.1X"
99+ number_of_workers = 10
100+ glue_version = " 5.0"
101+ default_arguments = var. default_arguments
102+ timeout = 480
103+ }
104+
105+ resource "aws_glue_job" "convert_features_to_recordio" {
106+ name = " Convert features to recordio"
107+ role_arn = var. glue_role_arn
108+ command {
109+ name = " glueetl"
110+ script_location = " s3://${ var . glue_script_bucket } /convert_features_to_recordio.py"
111+ python_version = " 3"
112+ }
113+
114+ worker_type = " G.2X"
115+ number_of_workers = 10
116+ glue_version = " 5.0"
117+ default_arguments = merge (var. default_arguments , local. converting_to_recordio_arguments )
118+ timeout = 180
119+ }
120+
121+ resource "aws_glue_job" "preprocessing_with_string_columns" {
122+ name = " Preprocesssing with string columns"
123+ role_arn = var. glue_role_arn
124+ command {
125+ name = " glueetl"
126+ script_location = " s3://${ var . glue_script_bucket } /preprocessing_for_inference.py"
127+ python_version = " 3"
128+ }
129+
56130 worker_type = " G.1X"
57131 number_of_workers = 10
58132 glue_version = " 5.0"
59133 default_arguments = var. default_arguments
60134 timeout = 120
135+ }
136+
137+ resource "aws_glue_job" "convert_parquet_to_csv_for_visualisation" {
138+ name = " Convert parquet to csv for visualization"
139+ role_arn = var. glue_role_arn
140+ command {
141+ name = " glueetl"
142+ script_location = " s3://${ var . glue_script_bucket } /convert_features_to_csv_inference.py"
143+ python_version = " 3"
144+ }
145+
146+ worker_type = " G.1X"
147+ number_of_workers = 10
148+ glue_version = " 5.0"
149+ default_arguments = var. default_arguments
150+ timeout = 120
151+ }
152+
153+ resource "aws_glue_job" "anomaly_classification" {
154+ name = " Anomaly Classification"
155+ role_arn = var. glue_role_arn
156+ command {
157+ name = " glueetl"
158+ script_location = " s3://${ var . glue_script_bucket } /detect_anomaly.py"
159+ python_version = " 3"
160+ }
161+
162+ worker_type = " G.1X"
163+ number_of_workers = 10
164+ glue_version = " 5.0"
165+ default_arguments = merge (var. default_arguments , local. anomaly_classification_arguments )
166+ timeout = 120
61167}
0 commit comments