Avro Example 4 — Use Array

Prateek
3 min readJun 22, 2023

How to use Array in Avro schema, lets check this out —

address.avsc

{
"name":"Address",
"namespace": "com.example.generated.model",
"type": "record",
"fields": [
{
"name":"addressLine1",
"type": "string"
},
{
"name":"city",
"type": "string"
},
{
"name":"state_province",
"type": "string"
},
{
"name":"country",
"type": "string",
"default": "USA"
},
{
"name":"zip",
"type": "string"
}
]
}

OrderLineItem.avsc

{
"name":"OrderLineItem",
"namespace": "com.example.generated.model",
"type": "record",
"fields": [
{
"name":"name",
"type": "string"
},
{
"name":"size",
"type": {
"type": "enum",
"name": "Size",
"symbols": ["SMALL", "MEDIUM", "LARGE"]
}
},
{
"name":"quantity",
"type": "int"
}
]
}

customer.avsc

{
"type": "record",
"namespace": "com.example.generated.model",
"name": "Customer",
"fields": [
{ "name": "first_name", "type": "string", "doc": "First Name of Customer" },
{ "name": "last_name", "type": "string", "doc": "Last Name of Customer" },
{ "name": "age", "type": "int", "doc": "Age at the time of registration" },
{ "name": "height", "type": "float", "doc": "Height at the time of registration in cm" },
{ "name": "weight", "type": "float", "doc": "Weight at the time of registration in kg" },
{ "name": "automated_email", "type": "boolean", "default": true, "doc": "Field indicating if the user is enrolled in marketing emails" },
{ "name":"address", "type": "Address"},
{ "name":"Languages", "type": { "type": "enum", "name": "Languages", "symbols": ["English", "Hindi", "Marathi", "Spanish"]}},
{
"name": "orderLineItems",
"type": {
"type": "array",
"items": {
"name": "orderLineItem",
"type": "OrderLineItem"
}
}
}
]
}

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.example</groupId>
<artifactId>avro-demo-1</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>

<name>avro-demo-1</name>
<url>http://maven.apache.org</url>

<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>

<repositories>
<repository>
<id>confluent</id>
<name>Confluent</name>
<url>https://packages.confluent.io/maven/</url>
</repository>
</repositories>

<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>com.github.javafaker</groupId>
<artifactId>javafaker</artifactId>
<version>1.0.2</version>
</dependency>
</dependencies>

<build>
<plugins>
<!-- Maven Compiler Plugin-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<version>1.9.2</version>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>schema</goal>
</goals>
<configuration>
<sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
<outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
<imports>
<import>${project.basedir}/src/main/avro/address.avsc</import>
<import>${project.basedir}/src/main/avro/OrderLineItem.avsc</import>
<import>${project.basedir}/src/main/avro/customer.avsc</import>
</imports>
<enableDecimalLogicalType>true</enableDecimalLogicalType>
<customConversions>org.apache.avro.Conversions$UUIDConversion</customConversions>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

MainApp.java

package com.example.specific;

import com.example.generated.model.Address;
import com.example.generated.model.Customer;
import com.example.generated.model.Languages;
import com.example.generated.model.OrderLineItem;
import com.example.generated.model.Size;
import com.github.javafaker.Faker;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.ThreadLocalRandom;

public class SpecificRecordExamples {
private static final Faker FAKER = Faker.instance();

public static void main(String[] args) {
OrderLineItem orderLineItem = OrderLineItem.newBuilder()
.setName("Mac Book Pro")
.setQuantity(10)
.setSize(Size.MEDIUM)
.build();

Address address = Address.newBuilder()
.setAddressLine1(Faker.instance().address().fullAddress())
.setCity(FAKER.address().city())
.setStateProvince(FAKER.address().state())
.setZip(FAKER.address().zipCode())
.build();

Customer customer = Customer.newBuilder()
.setAge(ThreadLocalRandom.current().nextInt(10, 50))
.setFirstName(FAKER.name().firstName())
.setLastName(FAKER.name().lastName())
.setAutomatedEmail(true)
.setHeight(180f)
.setWeight(70f)
.setAddress(address)
.setLanguages(Languages.English)
.setOrderLineItems(Arrays.asList(orderLineItem))
.build();

// write it out to a file
final DatumWriter<Customer> datumWriter = new SpecificDatumWriter<>(Customer.class);

try (DataFileWriter<Customer> dataFileWriter = new DataFileWriter<>(datumWriter)) {
dataFileWriter.create(customer.getSchema(), new File("customer-specific.avro"));
dataFileWriter.append(customer);
System.out.println("successfully wrote customer-specific.avro");
} catch (IOException e) {
e.printStackTrace();
}


// read it from a file
final File file = new File("customer-specific.avro");
final DatumReader<Customer> datumReader = new SpecificDatumReader<>(Customer.class);
final DataFileReader<Customer> dataFileReader;
try {
System.out.println("\nReading our specific record");
dataFileReader = new DataFileReader<>(file, datumReader);
while (dataFileReader.hasNext()) {
Customer readCustomer = dataFileReader.next();
System.out.println(readCustomer.toString());
System.out.println("First name: " + readCustomer.getFirstName());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}

successfully wrote customer-specific.avro

Reading our specific record
{“first_name”: “Donnette”, “last_name”: “Kuhic”, “age”: 11, “height”: 180.0, “weight”: 70.0, “automated_email”: true, “address”: {“addressLine1”: “Apt. 079 142 Kanesha Walks, Rosalindatown, WV 93228”, “city”: “Purdyshire”, “state_province”: “Ohio”, “country”: “USA”, “zip”: “77871”}, “Languages”: “English”, “orderLineItems”: [{“name”: “Mac Book Pro”, “size”: “MEDIUM”, “quantity”: 10}]}
First name: Donnette

--

--