Hi,
- no, the error messages are very cryptic.
- yes, it is thrown after the job is triggered. It fails
- well, we are no Spark experts, so we go with what the newly released AWS Glue Studio creates. Let me provide an example below.
Kind regards,
Ömer
I got rid of most columns:
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
## @params: [JOB_NAME]
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)
## @type: DataSource
## @args: [database = "glue_test", table_name = "jdbc_pulsedatalake_gluetesttable", transformation_ctx = "DataSource0"]
## @return: DataSource0
## @inputs: []
DataSource0 = glueContext.create_dynamic_frame.from_catalog(database = "glue_test", table_name = "jdbc_pulsedatalake_gluetesttable", transformation_ctx = "DataSource0")
## @type: ApplyMapping
## @args: [mappings = [("leistungszeitraumfrom", "timestamp", "leistungszeitraumfrom", "timestamp")], transformation_ctx = "Transform0"]
## @return: Transform0
## @inputs: [frame = DataSource0]
Transform0 = ApplyMapping.apply(frame = DataSource0, mappings = [("leistungszeitraumfrom", "timestamp", "leistungszeitraumfrom", "timestamp")], transformation_ctx = "Transform0")
## @type: DataSink
## @args: [database = "glue_test", format = "json", table_name = "jdbc_pulsedatalake_gluetesttable", transformation_ctx = "DataSink0"]
## @return: DataSink0
## @inputs: [frame = Transform0]
DataSink0 = glueContext.write_dynamic_frame.from_catalog(frame = Transform0, database = "glue_test", format = "json", table_name = "jdbc_pulsedatalake_gluetesttable", transformation_ctx = "DataSink0")
job.commit()