jsoup Get HTML Elements by Attribute Value in Java
Tags: jsoup HTML Parser
In this post, we explore the different methods provided by jsoup Java library that allow finding HTML elements by attribute value.
Add jsoup library to your Java project
To use jsoup Java library in the Gradle build project, add the following dependency into the build.gradle file.
compile 'org.jsoup:jsoup:1.13.1'
To use jsoup Java library in the Maven build project, add the following dependency into the pom.xml file.
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
To download the jsoup-1.13.1.jar file you can visit jsoup download page at jsoup.org/download
Sample HTML File
For example, we have the sample.html file as below.
<!DOCTYPE html>
<html>
<body>
<span data-test="value1">Simple Solution</span>
<span data-test="value2">Java Tutorials</span>
<span data-test="value3">Spring Boot Tutorials</span>
</body>
</html>
Using Document.getElementsByAttributeValue() method
This method can be used to find HTML elements by a given attribute value and name pair.
Elements elements = document.getElementsByAttributeValue("attribute-name", "attribute-value");
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
public class GetElementsByAttributeValueExample {
public static void main(String... args) {
try {
File file = new File("sample.html");
Document document = Jsoup.parse(file, "UTF-8");
Elements elements = document.getElementsByAttributeValue("data-test", "value2");
for(Element element : elements) {
System.out.println(element.text());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Java Tutorials
Using Document.getElementsByAttributeValueNot() method
This method can find all HTML elements that do not have the specific attribute name or elements that have a specific attribute name with a different attribute value.
Elements elements = document.getElementsByAttributeValueNot("attribute-name", "value");
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
public class GetElementsByAttributeValueNotExample {
public static void main(String... args) {
try {
File file = new File("sample.html");
Document document = Jsoup.parse(file, "UTF-8");
Elements elements = document.getElementsByAttributeValueNot("data-test", "value2");
for(Element element : elements) {
System.out.println("Text: " + element.text());
System.out.println("Tag Name: " + element.tagName());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Text: Simple Solution Java Tutorials Spring Boot Tutorials
Tag Name: #root
Text: Simple Solution Java Tutorials Spring Boot Tutorials
Tag Name: html
Text:
Tag Name: head
Text: Simple Solution Java Tutorials Spring Boot Tutorials
Tag Name: body
Text: Simple Solution
Tag Name: span
Text: Spring Boot Tutorials
Tag Name: span
Using Document.getElementsByAttributeValueStarting() method
The method Document.getElementsByAttributeValueStarting() can be used to find all elements in a HTML document that have attribute value starting with a given attribute prefix String.
Elements elements = document.getElementsByAttributeValueStarting("attribute-name", "prefix");
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
public class GetElementsByAttributeValueStartingExample {
public static void main(String... args) {
try {
File file = new File("sample.html");
Document document = Jsoup.parse(file, "UTF-8");
Elements elements = document.getElementsByAttributeValueStarting("data-test", "value");
for(Element element : elements) {
System.out.println("Text: " + element.text());
System.out.println("Tag Name: " + element.tagName());
System.out.println();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Text: Simple Solution
Tag Name: span
Text: Java Tutorials
Tag Name: span
Text: Spring Boot Tutorials
Tag Name: span
Using Document.getElementsByAttributeValueEnding() method
The method Document.getElementsByAttributeValueEnding() can be used to find all elements in a HTML document that have attribute value ending with a given suffix String.
Elements elements = document.getElementsByAttributeValueEnding("attribute-name", "suffix");
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
public class GetElementsByAttributeValueEndingExample {
public static void main(String... args) {
try {
File file = new File("sample.html");
Document document = Jsoup.parse(file, "UTF-8");
Elements elements = document.getElementsByAttributeValueEnding("data-test", "2");
for(Element element : elements) {
System.out.println("Text: " + element.text());
System.out.println("Tag Name: " + element.tagName());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Text: Java Tutorials
Tag Name: span
Using Document.getElementsByAttributeValueContaining() method
This method to find all HTML elements that have the attribute value contains a given String.
Elements elements = document.getElementsByAttributeValueContaining("attribute-name", "value");
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
public class GetElementsByAttributeValueContainingExample {
public static void main(String... args) {
try {
File file = new File("sample.html");
Document document = Jsoup.parse(file, "UTF-8");
Elements elements = document.getElementsByAttributeValueContaining("data-test", "value");
for(Element element : elements) {
System.out.println("Text: " + element.text());
System.out.println("Tag Name: " + element.tagName());
System.out.println();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Text: Simple Solution
Tag Name: span
Text: Java Tutorials
Tag Name: span
Text: Spring Boot Tutorials
Tag Name: span
Using Document.getElementsByAttributeValueMatching() method
This method is to find HTML elements that have attribute value matching with a given regular expression.
Elements elements = document.getElementsByAttributeValueMatching("attribute-name", "regex");
Elements elements = document.getElementsByAttributeValueMatching("attribute-name", pattern);
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
public class GetElementsByAttributeValueMatchingExample1 {
public static void main(String... args) {
try {
File file = new File("sample.html");
Document document = Jsoup.parse(file, "UTF-8");
Elements elements = document.getElementsByAttributeValueMatching("data-test", "^value");
for(Element element : elements) {
System.out.println("Text: " + element.text());
System.out.println("Tag Name: " + element.tagName());
System.out.println();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Text: Simple Solution
Tag Name: span
Text: Java Tutorials
Tag Name: span
Text: Spring Boot Tutorials
Tag Name: span
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
import java.util.regex.Pattern;
public class GetElementsByAttributeValueMatchingExample2 {
public static void main(String... args) {
try {
File file = new File("sample.html");
Document document = Jsoup.parse(file, "UTF-8");
Pattern pattern = Pattern.compile("^value");
Elements elements = document.getElementsByAttributeValueMatching("data-test", pattern);
for(Element element : elements) {
System.out.println("Text: " + element.text());
System.out.println("Tag Name: " + element.tagName());
System.out.println();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Text: Simple Solution
Tag Name: span
Text: Java Tutorials
Tag Name: span
Text: Spring Boot Tutorials
Tag Name: span
Happy Coding 😊
Related Articles
jsoup Get HTML elements by CSS class name in Java
jsoup Get HTML Element by ID in Java
jsoup Get HTML Elements by Tag Name in Java
jsoup Get HTML Elements by Attribute Name in Java
Clean HTML String to get Safe HTML from Untrusted HTML in Java using jsoup