In this example, we’ll learn how to read XML file using Spring batch and write data to mongoDB.
pom.xml
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-batch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-mongodb</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-oxm</artifactId>
</dependency>
<dependency>
<groupId>com.thoughtworks.xstream</groupId>
<artifactId>xstream</artifactId>
<version>1.4.20</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-core</artifactId>
<version>2.3.0.1</version>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.projectreactor</groupId>
<artifactId>reactor-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
Report.java
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class Report {
private Integer id;
private LocalDate date;
private Long impression;
private Integer clicks;
private BigDecimal earning;
}
ReportConverter.java
package com.mkyong.converter;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mkyong.model.Report;
import com.thoughtworks.xstream.converters.Converter;
import com.thoughtworks.xstream.converters.MarshallingContext;
import com.thoughtworks.xstream.converters.UnmarshallingContext;
import com.thoughtworks.xstream.io.HierarchicalStreamReader;
import com.thoughtworks.xstream.io.HierarchicalStreamWriter;
//http://xstream.codehaus.org/converter-tutorial.html
public class ReportConverter implements Converter {
private static final Logger logger = LoggerFactory.getLogger(ReportConverter.class);
private static final DateTimeFormatter DT_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd");
@Override
public boolean canConvert(Class type) {
//we only need "Report" object
return type.equals(Report.class);
}
@Override
public void marshal(Object source, HierarchicalStreamWriter writer, MarshallingContext context) {
// TODO Auto-generated method stub
}
@Override
public Object unmarshal(HierarchicalStreamReader reader, UnmarshallingContext context) {
Report report = new Report();
//get attribute
report.setId(Integer.valueOf(reader.getAttribute("id")));
reader.moveDown(); //get impression
report.setImpression(Long.parseLong(reader.getValue()));
reader.moveUp();
reader.moveDown();
report.setClicks(Integer.parseInt(reader.getValue()));
reader.moveUp();
reader.moveDown();
report.setEarning(new BigDecimal(reader.getValue()));
reader.moveUp();
reader.moveDown();
try {
report.setDate(LocalDate.parse(reader.getValue(), DT_FORMATTER));
} catch (Exception e) {
logger.error("Exception while parsing date {}", e.getMessage());
}
logger.info("ReportConverter | unmarshal {}", report);
return report;
}
}
database.xml
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:mongo="http://www.springframework.org/schema/data/mongo"
xsi:schemaLocation="http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context.xsd
http://www.springframework.org/schema/data/mongo http://www.springframework.org/schema/data/mongo/spring-mongo.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd">
<mongo:mongo-client host="localhost" port="27017" />
<mongo:db-factory dbname="test" />
<bean id="mongoTemplate" class="org.springframework.data.mongodb.core.MongoTemplate">
<constructor-arg name="mongoDbFactory" ref="mongoDbFactory" />
</bean>
</beans>
context.xml
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">
<import resource="classpath:database.xml"/>
<!-- stored job-meta in memory -->
<bean id="jobRepository" class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean">
<property name="transactionManager" ref="transactionManager"/>
</bean>
<bean id="transactionManager" class="org.springframework.batch.support.transaction.ResourcelessTransactionManager"/>
<bean id="jobLauncher" class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository"/>
</bean>
</beans>
job-report.xml
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:batch="http://www.springframework.org/schema/batch" xmlns:task="http://www.springframework.org/schema/task"
xmlns:util="http://www.springframework.org/schema/util" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/batch
http://www.springframework.org/schema/batch/spring-batch-2.2.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.2.xsd
http://www.springframework.org/schema/util
http://www.springframework.org/schema/util/spring-util-3.2.xsd">
<import resource="classpath:context.xml"/>
<batch:job id="reportJob">
<batch:step id="step1">
<batch:tasklet>
<batch:chunk reader="xmlItemReader" writer="mongodbItemWriter"
commit-interval="1">
</batch:chunk>
</batch:tasklet>
</batch:step>
</batch:job>
<bean id="mongodbItemWriter" class="org.springframework.batch.item.data.MongoItemWriter">
<property name="template" ref="mongoTemplate" />
<property name="collection" value="report" />
</bean>
<bean id="xmlItemReader" class="org.springframework.batch.item.xml.StaxEventItemReader">
<property name="fragmentRootElementName" value="record" />
<property name="resource" value="classpath:xml/report.xml" />
<property name="unmarshaller" ref="reportUnmarshaller" />
</bean>
<bean id="typePermission" class="com.thoughtworks.xstream.security.ExplicitTypePermission">
<constructor-arg value="com.mkyong.model.Report"/> <!-- string to array conversion automatically done by spring here -->
</bean>
<bean id="reportUnmarshaller" class="org.springframework.oxm.xstream.XStreamMarshaller">
<property name="typePermissions" ref="typePermission"/>
<property name="aliases">
<util:map id="aliases">
<entry key="record" value="com.mkyong.model.Report" />
</util:map>
</property>
<property name="converters">
<array>
<ref bean="reportConverter" />
</array>
</property>
</bean>
<bean id="reportConverter" class="com.mkyong.converter.ReportConverter" />
</beans>
MainApp.java
package com.mkyong;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
@SpringBootApplication(exclude = {DataSourceAutoConfiguration.class})
public class SpringBatchMongoDBToXMLApp implements CommandLineRunner{
private static final Logger LOGGER = LoggerFactory.getLogger(SpringBatchMongoDBToXMLApp.class);
public static void main(String[] args) {
SpringApplication.run(SpringBatchMongoDBToXMLApp.class, args);
}
@Override
public void run(String... args) throws Exception {
ApplicationContext context = new ClassPathXmlApplicationContext("job-report.xml");
JobLauncher jobLauncher = (JobLauncher) context.getBean("jobLauncher");
Job job = (Job) context.getBean("reportJob");
try {
JobExecution execution = jobLauncher.run(job, new JobParameters());
LOGGER.debug("Exit Status : {}", execution.getStatus());
} catch (Exception e) {
e.printStackTrace();
}
LOGGER.debug("Done !");
}
}
report.xml
<?xml version="1.0" encoding="UTF-8"?>
<report>
<record id="1">
<impression>213100</impression>
<clicks>980</clicks>
<earning>1000</earning>
<date>2022-07-20</date>
</record>
<record id="2">
<impression>213100</impression>
<clicks>478</clicks>
<earning>1200</earning>
<date>2024-07-20</date>
</record>
</report>
Output Logs —
. ____ _ __ _ _
/\\ / ___'_ __ _ _(_)_ __ __ _ \ \ \ \
( ( )\___ | '_ | '_| | '_ \/ _` | \ \ \ \
\\/ ___)| |_)| | | | | || (_| | ) ) ) )
' |____| .__|_| |_|_| |_\__, | / / / /
=========|_|==============|___/=/_/_/_/
:: Spring Boot :: (v2.7.1)
2024-06-05 21:12:16.628 INFO 35695 --- [ main] com.mkyong.SpringBatchMongoDBToXMLApp : Starting SpringBatchMongoDBToXMLApp using Java 17.0.1 on Prateeks-MacBook-Pro.local with PID 35695 (/Users/prats/Documents/Prateek/spring-batch-all/SpringBatchExample/target/classes started by prateekashtikar in /Users/prats/Documents/Prateek/spring-batch-all/SpringBatchExample)
2024-06-05 21:12:16.633 INFO 35695 --- [ main] com.mkyong.SpringBatchMongoDBToXMLApp : No active profile set, falling back to 1 default profile: "default"
2024-06-05 21:12:17.274 INFO 35695 --- [ main] .s.d.r.c.RepositoryConfigurationDelegate : Bootstrapping Spring Data MongoDB repositories in DEFAULT mode.
2024-06-05 21:12:17.287 INFO 35695 --- [ main] .s.d.r.c.RepositoryConfigurationDelegate : Finished Spring Data repository scanning in 9 ms. Found 0 MongoDB repository interfaces.
2024-06-05 21:12:17.668 INFO 35695 --- [ main] org.mongodb.driver.client : MongoClient with metadata {"driver": {"name": "mongo-java-driver|sync|spring-boot", "version": "4.6.1"}, "os": {"type": "Darwin", "name": "Mac OS X", "architecture": "x86_64", "version": "11.3"}, "platform": "Java/Eclipse Adoptium/17.0.1+12"} created with settings MongoClientSettings{readPreference=primary, writeConcern=WriteConcern{w=null, wTimeout=null ms, journal=null}, retryWrites=true, retryReads=true, readConcern=ReadConcern{level=null}, credential=null, streamFactoryFactory=null, commandListeners=[], codecRegistry=ProvidersCodecRegistry{codecProviders=[ValueCodecProvider{}, BsonValueCodecProvider{}, DBRefCodecProvider{}, DBObjectCodecProvider{}, DocumentCodecProvider{}, IterableCodecProvider{}, MapCodecProvider{}, GeoJsonCodecProvider{}, GridFSFileCodecProvider{}, Jsr310CodecProvider{}, JsonObjectCodecProvider{}, BsonCodecProvider{}, EnumCodecProvider{}, com.mongodb.Jep395RecordCodecProvider@5d5160e6]}, clusterSettings={hosts=[localhost:27017], srvServiceName=mongodb, mode=SINGLE, requiredClusterType=UNKNOWN, requiredReplicaSetName='null', serverSelector='null', clusterListeners='[]', serverSelectionTimeout='30000 ms', localThreshold='30000 ms'}, socketSettings=SocketSettings{connectTimeoutMS=10000, readTimeoutMS=0, receiveBufferSize=0, sendBufferSize=0}, heartbeatSocketSettings=SocketSettings{connectTimeoutMS=10000, readTimeoutMS=10000, receiveBufferSize=0, sendBufferSize=0}, connectionPoolSettings=ConnectionPoolSettings{maxSize=100, minSize=0, maxWaitTimeMS=120000, maxConnectionLifeTimeMS=0, maxConnectionIdleTimeMS=0, maintenanceInitialDelayMS=0, maintenanceFrequencyMS=60000, connectionPoolListeners=[], maxConnecting=2}, serverSettings=ServerSettings{heartbeatFrequencyMS=10000, minHeartbeatFrequencyMS=500, serverListeners='[]', serverMonitorListeners='[]'}, sslSettings=SslSettings{enabled=false, invalidHostNameAllowed=false, context=null}, applicationName='null', compressorList=[], uuidRepresentation=JAVA_LEGACY, serverApi=null, autoEncryptionSettings=null, contextProvider=null}
2024-06-05 21:12:17.703 INFO 35695 --- [localhost:27017] org.mongodb.driver.connection : Opened connection [connectionId{localValue:1, serverValue:310}] to localhost:27017
2024-06-05 21:12:17.703 INFO 35695 --- [localhost:27017] org.mongodb.driver.connection : Opened connection [connectionId{localValue:2, serverValue:311}] to localhost:27017
2024-06-05 21:12:17.703 INFO 35695 --- [localhost:27017] org.mongodb.driver.cluster : Monitor thread successfully connected to server with description ServerDescription{address=localhost:27017, type=STANDALONE, state=CONNECTED, ok=true, minWireVersion=0, maxWireVersion=21, maxDocumentSize=16777216, logicalSessionTimeoutMinutes=30, roundTripTimeNanos=35017750}
2024-06-05 21:12:18.056 INFO 35695 --- [ main] com.mkyong.SpringBatchMongoDBToXMLApp : Started SpringBatchMongoDBToXMLApp in 1.976 seconds (JVM running for 3.17)
2024-06-05 21:12:18.439 INFO 35695 --- [ main] org.mongodb.driver.client : MongoClient with metadata {"driver": {"name": "mongo-java-driver|sync|spring-data", "version": "4.6.1"}, "os": {"type": "Darwin", "name": "Mac OS X", "architecture": "x86_64", "version": "11.3"}, "platform": "Java/Eclipse Adoptium/17.0.1+12"} created with settings MongoClientSettings{readPreference=primary, writeConcern=WriteConcern{w=null, wTimeout=null ms, journal=null}, retryWrites=true, retryReads=true, readConcern=ReadConcern{level=null}, credential=null, streamFactoryFactory=null, commandListeners=[], codecRegistry=ProvidersCodecRegistry{codecProviders=[ValueCodecProvider{}, BsonValueCodecProvider{}, DBRefCodecProvider{}, DBObjectCodecProvider{}, DocumentCodecProvider{}, IterableCodecProvider{}, MapCodecProvider{}, GeoJsonCodecProvider{}, GridFSFileCodecProvider{}, Jsr310CodecProvider{}, JsonObjectCodecProvider{}, BsonCodecProvider{}, EnumCodecProvider{}, com.mongodb.Jep395RecordCodecProvider@5d5160e6]}, clusterSettings={hosts=[localhost:27017], srvServiceName=mongodb, mode=SINGLE, requiredClusterType=UNKNOWN, requiredReplicaSetName='null', serverSelector='null', clusterListeners='[]', serverSelectionTimeout='30000 ms', localThreshold='30000 ms'}, socketSettings=SocketSettings{connectTimeoutMS=10000, readTimeoutMS=0, receiveBufferSize=0, sendBufferSize=0}, heartbeatSocketSettings=SocketSettings{connectTimeoutMS=10000, readTimeoutMS=10000, receiveBufferSize=0, sendBufferSize=0}, connectionPoolSettings=ConnectionPoolSettings{maxSize=100, minSize=0, maxWaitTimeMS=120000, maxConnectionLifeTimeMS=0, maxConnectionIdleTimeMS=0, maintenanceInitialDelayMS=0, maintenanceFrequencyMS=60000, connectionPoolListeners=[], maxConnecting=2}, serverSettings=ServerSettings{heartbeatFrequencyMS=10000, minHeartbeatFrequencyMS=500, serverListeners='[]', serverMonitorListeners='[]'}, sslSettings=SslSettings{enabled=false, invalidHostNameAllowed=false, context=null}, applicationName='null', compressorList=[], uuidRepresentation=JAVA_LEGACY, serverApi=null, autoEncryptionSettings=null, contextProvider=null}
2024-06-05 21:12:18.453 INFO 35695 --- [ main] org.mongodb.driver.client : MongoClient with metadata {"driver": {"name": "mongo-java-driver|sync|spring-data", "version": "4.6.1"}, "os": {"type": "Darwin", "name": "Mac OS X", "architecture": "x86_64", "version": "11.3"}, "platform": "Java/Eclipse Adoptium/17.0.1+12"} created with settings MongoClientSettings{readPreference=primary, writeConcern=WriteConcern{w=null, wTimeout=null ms, journal=null}, retryWrites=true, retryReads=true, readConcern=ReadConcern{level=null}, credential=null, streamFactoryFactory=null, commandListeners=[], codecRegistry=ProvidersCodecRegistry{codecProviders=[ValueCodecProvider{}, BsonValueCodecProvider{}, DBRefCodecProvider{}, DBObjectCodecProvider{}, DocumentCodecProvider{}, IterableCodecProvider{}, MapCodecProvider{}, GeoJsonCodecProvider{}, GridFSFileCodecProvider{}, Jsr310CodecProvider{}, JsonObjectCodecProvider{}, BsonCodecProvider{}, EnumCodecProvider{}, com.mongodb.Jep395RecordCodecProvider@5d5160e6]}, clusterSettings={hosts=[127.0.0.1:27017], srvServiceName=mongodb, mode=SINGLE, requiredClusterType=UNKNOWN, requiredReplicaSetName='null', serverSelector='null', clusterListeners='[]', serverSelectionTimeout='30000 ms', localThreshold='30000 ms'}, socketSettings=SocketSettings{connectTimeoutMS=10000, readTimeoutMS=0, receiveBufferSize=0, sendBufferSize=0}, heartbeatSocketSettings=SocketSettings{connectTimeoutMS=10000, readTimeoutMS=10000, receiveBufferSize=0, sendBufferSize=0}, connectionPoolSettings=ConnectionPoolSettings{maxSize=100, minSize=0, maxWaitTimeMS=120000, maxConnectionLifeTimeMS=0, maxConnectionIdleTimeMS=0, maintenanceInitialDelayMS=0, maintenanceFrequencyMS=60000, connectionPoolListeners=[], maxConnecting=2}, serverSettings=ServerSettings{heartbeatFrequencyMS=10000, minHeartbeatFrequencyMS=500, serverListeners='[]', serverMonitorListeners='[]'}, sslSettings=SslSettings{enabled=false, invalidHostNameAllowed=false, context=null}, applicationName='null', compressorList=[], uuidRepresentation=JAVA_LEGACY, serverApi=null, autoEncryptionSettings=null, contextProvider=null}
2024-06-05 21:12:18.463 INFO 35695 --- [localhost:27017] org.mongodb.driver.connection : Opened connection [connectionId{localValue:4, serverValue:313}] to localhost:27017
2024-06-05 21:12:18.463 INFO 35695 --- [localhost:27017] org.mongodb.driver.connection : Opened connection [connectionId{localValue:3, serverValue:312}] to localhost:27017
2024-06-05 21:12:18.463 INFO 35695 --- [127.0.0.1:27017] org.mongodb.driver.connection : Opened connection [connectionId{localValue:5, serverValue:314}] to 127.0.0.1:27017
2024-06-05 21:12:18.463 INFO 35695 --- [127.0.0.1:27017] org.mongodb.driver.cluster : Monitor thread successfully connected to server with description ServerDescription{address=127.0.0.1:27017, type=STANDALONE, state=CONNECTED, ok=true, minWireVersion=0, maxWireVersion=21, maxDocumentSize=16777216, logicalSessionTimeoutMinutes=30, roundTripTimeNanos=15804084}
2024-06-05 21:12:18.463 INFO 35695 --- [localhost:27017] org.mongodb.driver.cluster : Monitor thread successfully connected to server with description ServerDescription{address=localhost:27017, type=STANDALONE, state=CONNECTED, ok=true, minWireVersion=0, maxWireVersion=21, maxDocumentSize=16777216, logicalSessionTimeoutMinutes=30, roundTripTimeNanos=23944041}
2024-06-05 21:12:18.463 INFO 35695 --- [127.0.0.1:27017] org.mongodb.driver.connection : Opened connection [connectionId{localValue:6, serverValue:315}] to 127.0.0.1:27017
2024-06-05 21:12:18.695 INFO 35695 --- [ main] o.s.b.c.l.support.SimpleJobLauncher : No TaskExecutor has been set, defaulting to synchronous executor.
2024-06-05 21:12:18.917 INFO 35695 --- [ main] o.s.b.c.l.support.SimpleJobLauncher : Job: [FlowJob: [name=reportJob]] launched with the following parameters: [{}]
2024-06-05 21:12:18.966 INFO 35695 --- [ main] o.s.batch.core.job.SimpleStepHandler : Executing step: [step1]
2024-06-05 21:12:22.600 INFO 35695 --- [ main] com.mkyong.converter.ReportConverter : ReportConverter | unmarshal Report(id=1, date=2022-07-20, impression=213100, clicks=980, earning=1000)
2024-06-05 21:12:30.466 INFO 35695 --- [ main] org.mongodb.driver.connection : Opened connection [connectionId{localValue:7, serverValue:316}] to 127.0.0.1:27017
2024-06-05 21:12:31.131 INFO 35695 --- [ main] com.mkyong.converter.ReportConverter : ReportConverter | unmarshal Report(id=2, date=2024-07-20, impression=213100, clicks=478, earning=1200)
2024-06-05 21:12:31.172 INFO 35695 --- [ main] o.s.batch.core.step.AbstractStep : Step: [step1] executed in 12s205ms
2024-06-05 21:12:31.180 INFO 35695 --- [ main] o.s.b.c.l.support.SimpleJobLauncher : Job: [FlowJob: [name=reportJob]] completed with the following parameters: [{}] and the following status: [COMPLETED] in 12s244ms
Reference —