The open source JTidy project does an excellent job of converting HTML files to the newer XHTML standard. You can find jTitdy here
import org.w3c.tidy.Tidy;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.w3c.dom.Document;
...
public static void htmlToXhtml(String htmlFile, String xhtmlFile){
try{
FileInputStream in=new FileInputStream(htmlFile);
FileOutputStream out=new FileOutputStream(xhtmlFile);
Tidy T=new Tidy();
Document D=T.parseDOM(in,out);
} catch (java.io.FileNotFoundException e) {
e.printStackTrace();
}
}