[英]CourseNotFound and CatalogNotFound for web-scraping application
我需要帮助抛出和处理两个异常,CourseNotFoundException 和 CatalogNotFoundException 用于学校的网络抓取项目。
当抓取成功但没有找到给定课程请求的数据时,它应该抛出 CourseNotFoundException。
并且,在抓取成功但未找到给定目录请求的数据时抛出 CatalogNotFoundException。
我指出了我认为应该将它们扔到哪里,但是当课程/目录不存在时,我无法弄清楚如何让它们扔掉。
这是代码。
package edu.tntech.csc2310;
import com.google.gson.stream.JsonReader;
import demo.BeanDemo;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.google.gson.Gson;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Scanner;
public class CourseCatalog {
private ArrayList<Course> db;
private String catalogYear;
private String subject;
public String getCatalogYear() {
return catalogYear;
}
public String getSubject() {
return subject;
}
public ArrayList<Course> getCourses(){
return db;
}
public CourseCatalog(String subject, String catalogYear) throws CatalogNotFoundException {
String subj = subject.trim().toUpperCase();
Integer trm = Integer.parseInt(catalogYear.trim());
// TODO: Add exception handling. This method should specify that it throws an exception
// Address the problem of subject and/or catalog year not existing
this.catalogYear = trm.toString();
this.subject = subj.toUpperCase();
this.db = new ArrayList();
Gson gson = new Gson();
try {
String filename = subject+"_"+catalogYear+".json";
File file = new File("src/main/resources/" + filename);
if (file.createNewFile()) {
ArrayList<String> list = CourseCatalog.getCourseNumbers(this.subject, this.catalogYear);
if (list.size() > 0) {
for (String s : list) {
Course c = new Course(this.subject, s, this.catalogYear);
this.db.add(c);
}
} else {
this.subject = null;
this.catalogYear = null;
this.db = null;
}
if(db.size() == 0){
throw new CatalogNotFoundException("Catalog not found");
}
String jsonString = gson.toJson(db);
FileWriter out = new FileWriter(file);
out.write(jsonString);
out.close();
} else {
FileReader in = new FileReader(file);
JsonReader jr = gson.newJsonReader(in);
Course[] instance = gson.fromJson(jr, Course[].class);
for(int i=0; i < instance.length ; i++){
db.add(instance[i]);
}
}
} catch (IOException ex){
}
}
public Course getCourse(String number){
Course result = null;
for (Course c: db){
if (c.getNumber().equalsIgnoreCase(number)){
result = c;
break;
}
}
return result;
}
public String toString(){
return this.db.toString();
}
@SuppressWarnings("SpellCheckingInspection")
public static ArrayList<String> getCourseNumbers(String subject, String catalogYear){
Document doc = null;
ArrayList<String> list = new ArrayList();
try {
doc = Jsoup.connect("https://ttuss1.tntech.edu/PROD/bwckctlg.p_display_courses?sel_crse_strt=1000&sel_crse_end=4999&sel_subj=&sel_levl=&sel_schd=&sel_coll=&sel_divs=&sel_dept=&sel_attr="+"&term_in="+catalogYear+"&one_subj="+subject).get();
Elements courseTitles = doc.select(".nttitle");
for (Element title : courseTitles) {
String line = title.text();
Scanner scan = new Scanner(line);
scan.useDelimiter(" ");
scan.next();
String crseNum = scan.next();
list.add(crseNum);
}
} catch (IOException e) {
e.printStackTrace();
}
return list;
}
private static void log(String msg, String... vals) {
System.out.println(String.format(msg, vals));
}
}
package edu.tntech.csc2310;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import javax.security.auth.Subject;
import java.io.IOException;
import java.util.Scanner;
@SuppressWarnings("SpellCheckingInspection")
public class Course {
private static final String url = "https://ttuss1.tntech.edu/PROD/bwckctlg.p_disp_course_detail?";
private String subject;
private String number;
private String title;
private String description;
private int credits;
private String[] prerequisites;
public Course(String subject, String number, String term) throws CourseNotFoundException{
/**
* TODO: Add exception support here. This method should specify that it throws an exception
* Modify this method so that it throws a CourseNotFoundException if the course does
* not exist.
*/
String subj = subject.trim().toUpperCase();
String numb = number.trim();
Integer trm = Integer.parseInt(term.trim());
Integer numbTest = Integer.parseInt(numb);
String searchUrl = url + "&cat_term_in=" + trm.toString() + "&subj_code_in=" + subj + "&crse_numb_in=" + numbTest.toString();
try {
Document doc = Jsoup.connect(searchUrl).get();
Elements elements = doc.select(".nttitle");
if (elements.size() > 0) {
String temp = (String) elements.get(0).text();
int index = temp.indexOf('-');
this.title = temp.substring(index + 2);
Elements courseDescription = doc.select(".ntdefault");
this.description = courseDescription.get(0).text();
this.subject = subj;
this.number = numb;
this.credits = (int) this.parseCRH();
} else {
this.subject = null;
this.description = null;
this.number = null;
this.credits = -1;
}
} catch (IOException e) {
e.printStackTrace();
}
}
private double parseCRH(){
int index = this.description.indexOf("Credit hours");
String tmp = this.description.substring(0, index-1);
int first = tmp.lastIndexOf(" ");
int idx = Math.max(first, 0);
tmp = tmp.substring(idx).trim();
return Double.parseDouble(tmp);
}
public String getSubject() {
return subject;
}
public String getNumber() {
return number;
}
public String getTitle() {
return title;
}
public String getDescription() {
return description;
}
/**
* Creates a flattened list of pre-requisites; removes C or D or better information,
* as well as disjunctive normal form. All structure should be removed from the pre-requisite list
* @return
*/
public String[] getPrerequisites() {
String[] repls = {
"Course or Test: ",
"Minimum Grade of C ",
"Minimum Grade of D ",
"May not be taken concurrently.",
"May be taken concurrently.",
"(",
")"
};
String[] list = null;
if (this.description != null) {
int sindex = this.description.lastIndexOf("Requirements:");
if (sindex > 0) {
String subStr = this.description.substring(sindex + 13).trim();
for (int i = 0; i < repls.length; i++) {
subStr = subStr.replace(repls[i], "");
}
subStr = subStr.replace("or", ",");
subStr = subStr.replace("and", ",");
list = subStr.split(",");
}
}
return list;
}
public int getCredits() {
return credits;
}
public String toString(){
return subject + " " + number + " " + title + "\n" + description;
}
public String toString(boolean full){
if (full)
return this + this.description;
else
return this.toString();
}
private static void log(String msg, String... vals) {
System.out.println(String.format(msg, vals));
}
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.