[英]Java - Get all files in directories and subdirectories recursively
這里有些曲折。 我正在使用的不是本地目錄。 我正在嘗試為此鏈接創建一個批量下載器:
https://files.secureserver.net/0fHCh0CLd6Az63
https://files.secureserver.net/0fdAWETp4sONW5
在這里,每個文件和文件夾都分配有唯一的ID。
-我的目標是從所有目錄和子目錄中獲取所有文件ID。
如果我有ID,就可以下載文件並查看文件夾的內容。
所以問題是,當我獲取目錄的內容時,它們在其中包含子目錄。
那么,如何遞歸獲取這些目錄和子目錄中的所有文件ID?
可以用樹數據結構解決嗎,或者有什么簡單的方法嗎?
這是我的代碼:
package javaapplication1;
import java.io.IOException;
import java.util.Iterator;
import java.util.TreeSet;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class GoDaddyDownloader2 {
Document document;
String openFolderUrl;
String downloadFileUrl;
String frameSrc;
TreeSet folderTreeSet;
TreeSet fileTreeSet;
StringBuilder fileId;
StringBuilder folderId;
public GoDaddyDownloader2() {
openFolderUrl = "";
downloadFileUrl = "";
frameSrc = "";
folderTreeSet = new TreeSet();
fileTreeSet = new TreeSet();
fileId = new StringBuilder();
folderId = new StringBuilder();
}
public void getUrl(String url) throws IOException {
document = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0")
.get();
frameSrc = document.getElementsByTag("iframe").attr("src");
openFolderUrl = frameSrc.replace("display_folder", "get_listing");
openFolderUrl = openFolderUrl.replace("public_folder", "public_folder_ajax");
downloadFileUrl = frameSrc.replace("display_folder", "get_download_url");
System.out.println(frameSrc);
System.out.println(openFolderUrl);
System.out.println(downloadFileUrl);
getRootFolder();
}
public void getRootFolder() throws IOException {
document = Jsoup.connect(frameSrc)
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0")
.get();
getFileAndFilders();
//getFolderById("686499839");
}
public void getFileAndFilders() {
Elements mapElements = document.getElementsByTag("map");
for (Element temp : mapElements) {
//System.out.println(StringEscapeUtils.unescapeHtml4(temp.toString()));
if (!temp.attr("folder_id").toString().contentEquals("")) {
// System.out.println("====>" + temp.attr("folder_id").toString());
if (temp.attr("folder_id").toString().contains("\"")) {
folderId = new StringBuilder(temp.
attr("folder_id").toString().
substring(temp.attr("folder_id").toString().
indexOf("\"") + 1,
temp.attr("folder_id").toString().
lastIndexOf("\"") - 1));
// System.out.println(folderId);
} else {
folderTreeSet.add(temp.attr("folder_id").toString());
}
} else if (!temp.attr("file_id").toString().contentEquals("")) {
// System.out.println("++++>" + temp.attr("file_id").toString());
if (temp.attr("file_id").toString().contains("\"")) {
fileId = new StringBuilder(temp.
attr("file_id").toString().
substring(temp.attr("file_id").toString().
indexOf("\"") + 1,
temp.attr("file_id").toString().
lastIndexOf("\"") - 1));
fileTreeSet.add(fileId.toString());
// System.out.println(fileId);
} else {
fileTreeSet.add(temp.attr("file_id").toString());
}
}
}
}
public void getFolderById(String fid) throws IOException {
document = Jsoup.connect(openFolderUrl)
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0")
.data("folder_id", fid)
.data("open_folder_id", "")
.data("view", "list")
.data("column_number", "0")
.data("sort_term", "name")
.data("sort_direction", "asc")
.data("offset", "0")
.method(Connection.Method.POST)
.execute().parse();
getFileAndFilders();
}
public String downloadFileById(String fileId) throws IOException {
String link = Jsoup.connect(downloadFileUrl)
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0")
.data("file_id", fileId)
.method(Connection.Method.POST)
.execute().parse().text();
System.out.println(link);
return link;
}
public static void main(String[] args) throws IOException {
GoDaddyDownloader2 obj = new GoDaddyDownloader2();
obj.getUrl("https://files.secureserver.net/0fHCh0CLd6Az63");
//Contents of root directory
Iterator i = obj.folderTreeSet.iterator();
System.out.println("Folders");
while (i.hasNext()) {
String s = (String) i.next();
System.out.println(s);
}
System.out.println("---------------");
System.out.println("Files");
i = obj.fileTreeSet.iterator();
while (i.hasNext()) {
String s = (String) i.next();
System.out.println(s);
}
System.out.println("===============");
//Adding Contents of first directory to TreeSet
System.out.println("After adding contents of first directory");
obj.getFolderById(obj.folderTreeSet.first().toString());
System.out.println("Folders");
i = obj.folderTreeSet.iterator();
while (i.hasNext()) {
String s = (String) i.next();
System.out.println(s);
}
System.out.println("---------------");
System.out.println("Files");
i = obj.fileTreeSet.iterator();
while (i.hasNext()) {
String s = (String) i.next();
System.out.println(s);
}
System.out.println("Generate file link");
obj.downloadFileById(obj.fileTreeSet.first().toString());
}
}
我正在使用TreeSet來避免重復。
答案就出在問題上:由於您要遞歸執行某項操作,因此顯而易見的方法是使用遞歸。 類似於以下偽代碼:
public Set<Thing> downloadEverything(Directory directory) {
Set<Thing> result = new HashSet<>();
downloadEverything(directory, result);
}
private void downloadEverything(Directory directory, Set<Thing> result) {
for (File file : getFilesOfDirectory() {
result.add(downloadThingFromFile(file));
}
for (Directory subDirectory : getSubdirectoriesOfDirectory(directory) {
downloadEverything(subDirectory, result);
}
}
private Thing downloadThingFromFile(File file) {
// ...
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.