Spring Batch — Read from XML and write to Mongo

Prateek
5 min readDec 31, 2019

--

In this post, we will show you how to use Spring Batch to read an XML file with your ItemReader using StaxEventItemReader and write its data to NoSQL using Custom ItemWriter with JpaRepository. Here we’ve used MongoDB.

Custom ItemReader or ItemWriter is a class where we write our own way of reading or writing data. In Custom Reader we are required to handle the chunking logic as well. This comes in handy if our reading logic is complex and cannot be handled using Default ItemReader provided by spring.

Tools and libraries used:

Maven 3.5+
Spring Batch Starter
Spring OXM
Data Mongodb starter
xstream

Maven Dependency — Need to configure the project

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.2.2.RELEASE</version>
<relativePath /> <!-- lookup parent from repository -->
</parent>
<groupId>com.example</groupId>
<artifactId>spring-batch-mongodb</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>spring-batch-mongodb</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>1.8</java.version>
<maven-jar-plugin.version>3.1.1</maven-jar-plugin.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-batch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-oxm</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-mongodb</artifactId>
</dependency>
<dependency>
<groupId>com.thoughtworks.xstream</groupId>
<artifactId>xstream</artifactId>
<version>1.4.7</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<scope>runtime</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

CustomerWriter — This is Custom writer we’ve created to write the customer data into mongodb. Custom writer gives capability to perform complex operations too.

package com.example.writer;
import java.util.List;
import org.springframework.batch.item.ItemWriter;
import org.springframework.beans.factory.annotation.Autowired;
import com.example.domain.Customer;
import com.example.repository.CustomerRepository;
public class CustomerWriter implements ItemWriter<Customer>{
@Autowired
private CustomerRepository customerRepository;

@Override
public void write(List<? extends Customer> customers) throws Exception {
customerRepository.saveAll(customers);
}
}

CustomerRepository — This is mongo repository which talks with database.

package com.example.repository;
import org.springframework.data.mongodb.repository.MongoRepository;
import com.example.domain.Customer;
public interface CustomerRepository extends MongoRepository<Customer, String>{
}

Customer — This is mongo document class which holds business data.

package com.example.domain;
import java.time.LocalDate;
import javax.xml.bind.annotation.XmlRootElement;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import org.springframework.data.mongodb.core.mapping.Field;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@AllArgsConstructor
@NoArgsConstructor
@Builder
@Data
@XmlRootElement(name = "Customer")
@Document
public class Customer {
@Id
private Long id;
@Field
private String firstName;
@Field
private String lastName;
@Field
private LocalDate birthdate;
}

CustomerConverter — We’ve implement the Converter interface. This class is used to Converter implementations are responsible marshalling Java objects to/from textual data. If an exception occurs during processing, ConversionException should be thrown. If working with the high level com.thoughtworks.xstream.XStream facade, you can register new converters using the XStream.registerConverter() method.

package com.example.config;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import com.example.domain.Customer;
import com.thoughtworks.xstream.converters.Converter;
import com.thoughtworks.xstream.converters.MarshallingContext;
import com.thoughtworks.xstream.converters.UnmarshallingContext;
import com.thoughtworks.xstream.io.HierarchicalStreamReader;
import com.thoughtworks.xstream.io.HierarchicalStreamWriter;
public class CustomerConverter implements Converter {
private static final DateTimeFormatter DT_FORMATTER = DateTimeFormatter.ofPattern("dd-MM-yyyy HH:mm:ss");

@Override
public boolean canConvert(Class type) {
return type.equals(Customer.class);
}
@Override
public void marshal(Object source, HierarchicalStreamWriter writer, MarshallingContext context) {
// Don't do anything
}
@Override
public Object unmarshal(HierarchicalStreamReader reader, UnmarshallingContext context) {
reader.moveDown();
Customer customer = new Customer();
customer.setId(Long.valueOf(reader.getValue()));

reader.moveUp();
reader.moveDown();
customer.setFirstName(reader.getValue());

reader.moveUp();
reader.moveDown();
customer.setLastName(reader.getValue());

reader.moveUp();
reader.moveDown();
customer.setBirthdate(LocalDate.parse(reader.getValue(), DT_FORMATTER));

return customer;
}
}

JobConfiguration — This is main class responsible for performing the batch job. In this class we used various Beans to perform the individual task.

StaxEventItemReader — Item reader for reading XML input based on StAX. It extracts fragments from the input XML document which correspond to records for processing. The fragments are wrapped with StartDocument and EndDocument events so that the fragments can be further processed like standalone XML documents. The implementation is not thread-safe.

CustomerWriter- This is custom class which writes data to MongoDB.

step1 — This step configures ItemReader and ItemWriter, however ItemProcessor is optional step, which we’ve skip.

Job — Batch domain object representing a job. Job is an explicit abstraction representing the configuration of a job specified by a developer. It should be noted that restart policy is applied to the job as a whole and not to a step.

package com.example.config;
import java.util.HashMap;
import java.util.Map;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.item.xml.StaxEventItemReader;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.ClassPathResource;
import org.springframework.oxm.xstream.XStreamMarshaller;
import com.example.domain.Customer;
import com.example.writer.CustomerWriter;
@Configuration
public class JobConfiguration {
@Autowired
private JobBuilderFactory jobBuilderFactory;

@Autowired
private StepBuilderFactory stepBuilderFactory;
@Bean
public StaxEventItemReader<Customer> customerItemReader(){
Map<String, Class> aliases = new HashMap<>();
aliases.put("customer", Customer.class);

CustomerConverter converter = new CustomerConverter();
XStreamMarshaller ummarshaller = new XStreamMarshaller();
ummarshaller.setAliases(aliases);
ummarshaller.setConverters(converter);

StaxEventItemReader<Customer> reader = new StaxEventItemReader<>();
reader.setResource(new ClassPathResource("/data/customer.xml"));
reader.setFragmentRootElementName("customer");
reader.setUnmarshaller(ummarshaller);

return reader;
}

@Bean
public CustomerWriter customerWriter() {
return new CustomerWriter();
}

@Bean
public Step step1() throws Exception {
return stepBuilderFactory.get("step1")
.<Customer, Customer>chunk(200)
.reader(customerItemReader())
.writer(customerWriter())
.build();
}

@Bean
public Job job() throws Exception {
return jobBuilderFactory.get("job")
.start(step1())
.build();
}
}

application.properties

spring.data.mongodb.host=localhost
spring.data.mongodb.port=27017

Customer.xml — This is sample data.

<?xml version="1.0" encoding="UTF-8" ?>
<customers>
<customer>
<id>1</id>
<firstName>John</firstName>
<lastName>Doe</lastName>
<birthdate>10-10-1988 19:43:23</birthdate>
</customer>
<customer>
<id>2</id>
<firstName>James</firstName>
<lastName>Moss</lastName>
<birthdate>01-04-1991 10:20:23</birthdate>
</customer>
<customer>
<id>3</id>
<firstName>Jonie</firstName>
<lastName>Gamble</lastName>
<birthdate>21-07-1982 11:12:13</birthdate>
</customer>
<customer>
<id>4</id>
<firstName>Mary</firstName>
<lastName>Kline</lastName>
<birthdate>07-08-1973 11:27:42</birthdate>
</customer>
<customer>
<id>5</id>
<firstName>William</firstName>
<lastName>Lockhart</lastName>
<birthdate>04-04-1994 04:15:11</birthdate>
</customer>
<customer>
<id>6</id>
<firstName>John</firstName>
<lastName>Doe</lastName>
<birthdate>10-10-1988 19:43:23</birthdate>
</customer>
<customer>
<id>7</id>
<firstName>Kristi</firstName>
<lastName>Dukes</lastName>
<birthdate>17-09-1983 21:22:23</birthdate>
</customer>
<customer>
<id>8</id>
<firstName>Angel</firstName>
<lastName>Porter</lastName>
<birthdate>15-12-1980 18:09:09</birthdate>
</customer>
<customer>
<id>9</id>
<firstName>Mary</firstName>
<lastName>Johnston</lastName>
<birthdate>07-07-1987 19:43:03</birthdate>
</customer>
<customer>
<id>10</id>
<firstName>Linda</firstName>
<lastName>Rodriguez</lastName>
<birthdate>16-09-1991 09:13:43</birthdate>
</customer>
<customer>
<id>11</id>
<firstName>Phillip</firstName>
<lastName>Lopez</lastName>
<birthdate>18-12-1965 11:10:09</birthdate>
</customer>
<customer>
<id>12</id>
<firstName>Peter</firstName>
<lastName>Dixon</lastName>
<birthdate>09-05-1996 19:09:23</birthdate>
</customer>
</customers>

MainApp — SpringBatchMongodbApplication can be run as Spring Boot project.

package com.example;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;
@SpringBootApplication(exclude = {DataSourceAutoConfiguration.class})
@EnableBatchProcessing
@EnableMongoRepositories(basePackages = "com.example.repository")
public class SpringBatchMongodbApplication {
public static void main(String[] args) {
SpringApplication.run(SpringBatchMongodbApplication.class, args);
}
}

Output:

db.getCollection('customer').find({})
/* 1 */
{
"_id" : NumberLong(1),
"firstName" : "John",
"lastName" : "Doe",
"birthdate" : ISODate("1988-10-09T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 2 */
{
"_id" : NumberLong(2),
"firstName" : "James",
"lastName" : "Moss",
"birthdate" : ISODate("1991-03-31T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 3 */
{
"_id" : NumberLong(3),
"firstName" : "Jonie",
"lastName" : "Gamble",
"birthdate" : ISODate("1982-07-20T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 4 */
{
"_id" : NumberLong(4),
"firstName" : "Mary",
"lastName" : "Kline",
"birthdate" : ISODate("1973-08-06T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 5 */
{
"_id" : NumberLong(5),
"firstName" : "William",
"lastName" : "Lockhart",
"birthdate" : ISODate("1994-04-03T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 6 */
{
"_id" : NumberLong(6),
"firstName" : "John",
"lastName" : "Doe",
"birthdate" : ISODate("1988-10-09T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 7 */
{
"_id" : NumberLong(7),
"firstName" : "Kristi",
"lastName" : "Dukes",
"birthdate" : ISODate("1983-09-16T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 8 */
{
"_id" : NumberLong(8),
"firstName" : "Angel",
"lastName" : "Porter",
"birthdate" : ISODate("1980-12-14T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 9 */
{
"_id" : NumberLong(9),
"firstName" : "Mary",
"lastName" : "Johnston",
"birthdate" : ISODate("1987-07-06T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 10 */
{
"_id" : NumberLong(10),
"firstName" : "Linda",
"lastName" : "Rodriguez",
"birthdate" : ISODate("1991-09-15T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 11 */
{
"_id" : NumberLong(11),
"firstName" : "Phillip",
"lastName" : "Lopez",
"birthdate" : ISODate("1965-12-17T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}
/* 12 */
{
"_id" : NumberLong(12),
"firstName" : "Peter",
"lastName" : "Dixon",
"birthdate" : ISODate("1996-05-08T18:30:00.000Z"),
"_class" : "com.example.domain.Customer"
}

--

--

Prateek
Prateek

Written by Prateek

Java Developer and enthusiast

No responses yet