Hello All
I have created 3 docker containers running in one network using docker images as follows :
postgres
aws glue image
oracle image
Sharing docker yml for same .
version: "2"
services:
spark-postgres:
image: postgres:latest
container_name: spark-postgres
build: ./postgresql
restart: always
hostname: spark-postgres
env_file:
- ./env/postgresdb-env-vars.env
ports:
- "5432:5432"
volumes:
- ./data/territoryhub-replication/postgresql:/var/lib/postgresql/data
networks:
glue-network:
ipv4_address: 10.4.0.4
spark-oracle:
image: oracle:test
container_name: spark-oracle
build: ./oracle
restart: always
hostname: spark-oracle
env_file:
- ./env/oracledb-env-vars.env
ports:
- "1521:1521"
volumes:
- ./data/territoryhub-replication/oracle:/opt/oracle/oradata
- ./oracle/oracle-scripts:/opt/oracle/scripts/startup
networks:
glue-network:
ipv4_address: 10.4.0.5
spark-master:
image: spark-master
container_name: spark-master
build: ./spark
hostname: spark-master
depends_on :
- spark-postgres
- spark-oracle
ports:
- "8888:8888"
- "4040:4040"
env_file:
- ./env/spark-env-vars.env
command : "/home/jupyter/jupyter_start.sh"
volumes:
- ../app/territoryhub-replication:/home/jupyter/jupyter_default_dir
networks:
glue-network:
ipv4_address: 10.4.0.3
networks:
glue-network:
driver: bridge
ipam:
config:
- subnet: 10.4.0.0/16
gateway: 10.4.0.1
Also I would like to mention I was not finding oracle jdbc driver to connect to oracle database so in my glue image (spark-master) I have added jar file .
Sharing docker file of that
FROM amazon/aws-glue-libs:glue_libs_1.0.0_image_01
COPY jar/ojdbc8.jar /home/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8/jars/ojdbc8.jar
RUN mkdir -p /root/.aws
RUN echo "[default]\nregion=us-east-1" >> /root/.aws/config
Now I am simply trying to connect to oracle database created on same network from my local which I am running as follows :
http://localhost:8888/tree?
pyspark code is
from pyspark import SparkContext
from awsglue.context import GlueContext
glueContext = GlueContext(SparkContext.getOrCreate())
inputDF = glueContext.create_dynamic_frame_from_options(connection_type = "oracle", connection_options = {"url": "jdbc:oracle:thin:@//10.4.0.5:1521:ORCLCDB", "user": 'system', "password": '<some pwd>' ,"dbtable": "<some table name>"})
inputDF.toDF().show()
I am getting below error
Invalid Oracle URL specified
An error was encountered:
An error occurred while calling o303.getDynamicFrame.
: java.sql.SQLException: Invalid Oracle URL specified
at oracle.jdbc.driver.PhysicalConnection.parseUrl(PhysicalConnection.java:1738)
at oracle.jdbc.driver.PhysicalConnection.readConnectionProperties(PhysicalConnection.java:1419)
at oracle.jdbc.driver.PhysicalConnection.<init>(PhysicalConnection.java:943)
at oracle.jdbc.driver.PhysicalConnection.<init>(PhysicalConnection.java:928)
at oracle.jdbc.driver.T4CConnection.<init>(T4CConnection.java:557)
at oracle.jdbc.driver.T4CDriverExtension.getConnection(T4CDriverExtension.java:68)
at oracle.jdbc.driver.OracleDriver.connect(OracleDriver.java:732)
at oracle.jdbc.driver.OracleDriver.connect(OracleDriver.java:648)
at com.amazonaws.services.glue.util.JDBCWrapper$$anonfun$8.apply(JDBCUtils.scala:900)
at com.amazonaws.services.glue.util.JDBCWrapper$$anonfun$8.apply(JDBCUtils.scala:896)
at com.amazonaws.services.glue.util.JDBCWrapper$$anonfun$connectWithSSLAttempt$1$$anonfun$apply$6.apply(JDBCUtils.scala:852)
at scala.Option.getOrElse(Option.scala:121)
at com.amazonaws.services.glue.util.JDBCWrapper$$anonfun$connectWithSSLAttempt$1.apply(JDBCUtils.scala:852)
at scala.Option.getOrElse(Option.scala:121)
at com.amazonaws.services.glue.util.JDBCWrapper$.connectWithSSLAttempt(JDBCUtils.scala:852)
at com.amazonaws.services.glue.util.JDBCWrapper$.connectionProperties(JDBCUtils.scala:895)
at com.amazonaws.services.glue.util.JDBCWrapper.connectionProperties$lzycompute(JDBCUtils.scala:671)
at com.amazonaws.services.glue.util.JDBCWrapper.connectionProperties(JDBCUtils.scala:671)
at com.amazonaws.services.glue.util.JDBCWrapper.tableDF(JDBCUtils.scala:797)
at com.amazonaws.services.glue.util.NoCondition$.tableDF(JDBCUtils.scala:85)
at com.amazonaws.services.glue.util.NoJDBCPartitioner$.tableDF(JDBCUtils.scala:124)
at com.amazonaws.services.glue.JDBCDataSource.getDynamicFrame(DataSource.scala:863)
at com.amazonaws.services.glue.DataSource$class.getDynamicFrame(DataSource.scala:97)
at com.amazonaws.services.glue.SparkSQLDataSource.getDynamicFrame(DataSource.scala:683)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Traceback (most recent call last):
File "/home/aws-glue-libs/awsglue.zip/awsglue/context.py", line 204, in create_dynamic_frame_from_options
return source.getFrame(**kwargs)
File "/home/aws-glue-libs/awsglue.zip/awsglue/data_source.py", line 36, in getFrame
jframe = self._jsource.getDynamicFrame()
File "/home/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/home/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8/python/lib/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/home/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o303.getDynamicFrame.
: java.sql.SQLException: Invalid Oracle URL specified
at oracle.jdbc.driver.PhysicalConnection.parseUrl(PhysicalConnection.java:1738)
at oracle.jdbc.driver.PhysicalConnection.readConnectionProperties(PhysicalConnection.java:1419)
at oracle.jdbc.driver.PhysicalConnection.<init>(PhysicalConnection.java:943)
at oracle.jdbc.driver.PhysicalConnection.<init>(PhysicalConnection.java:928)
at oracle.jdbc.driver.T4CConnection.<init>(T4CConnection.java:557)
at oracle.jdbc.driver.T4CDriverExtension.getConnection(T4CDriverExtension.java:68)
at oracle.jdbc.driver.OracleDriver.connect(OracleDriver.java:732)
at oracle.jdbc.driver.OracleDriver.connect(OracleDriver.java:648)
at com.amazonaws.services.glue.util.JDBCWrapper$$anonfun$8.apply(JDBCUtils.scala:900)
at com.amazonaws.services.glue.util.JDBCWrapper$$anonfun$8.apply(JDBCUtils.scala:896)
at com.amazonaws.services.glue.util.JDBCWrapper$$anonfun$connectWithSSLAttempt$1$$anonfun$apply$6.apply(JDBCUtils.scala:852)
at scala.Option.getOrElse(Option.scala:121)
at com.amazonaws.services.glue.util.JDBCWrapper$$anonfun$connectWithSSLAttempt$1.apply(JDBCUtils.scala:852)
at scala.Option.getOrElse(Option.scala:121)
at com.amazonaws.services.glue.util.JDBCWrapper$.connectWithSSLAttempt(JDBCUtils.scala:852)
at com.amazonaws.services.glue.util.JDBCWrapper$.connectionProperties(JDBCUtils.scala:895)
at com.amazonaws.services.glue.util.JDBCWrapper.connectionProperties$lzycompute(JDBCUtils.scala:671)
at com.amazonaws.services.glue.util.JDBCWrapper.connectionProperties(JDBCUtils.scala:671)
at com.amazonaws.services.glue.util.JDBCWrapper.tableDF(JDBCUtils.scala:797)
at com.amazonaws.services.glue.util.NoCondition$.tableDF(JDBCUtils.scala:85)
at com.amazonaws.services.glue.util.NoJDBCPartitioner$.tableDF(JDBCUtils.scala:124)
at com.amazonaws.services.glue.JDBCDataSource.getDynamicFrame(DataSource.scala:863)
at com.amazonaws.services.glue.DataSource$class.getDynamicFrame(DataSource.scala:97)
at com.amazonaws.services.glue.SparkSQLDataSource.getDynamicFrame(DataSource.scala:683)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)