繁体   English   中英

CourseNotFound 和 CatalogNotFound 用于网络抓取应用程序

[英]CourseNotFound and CatalogNotFound for web-scraping application

我需要帮助抛出和处理两个异常,CourseNotFoundException 和 CatalogNotFoundException 用于学校的网络抓取项目。

当抓取成功但没有找到给定课程请求的数据时,它应该抛出 CourseNotFoundException。

并且,在抓取成功但未找到给定目录请求的数据时抛出 CatalogNotFoundException。

我指出了我认为应该将它们扔到哪里,但是当课程/目录不存在时,我无法弄清楚如何让它们扔掉。

这是代码。

    package edu.tntech.csc2310;
import com.google.gson.stream.JsonReader;
import demo.BeanDemo;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.google.gson.Gson;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Scanner;

public class CourseCatalog {

    private ArrayList<Course> db;
    private String catalogYear;
    private String subject;

    public String getCatalogYear() {
        return catalogYear;
    }

    public String getSubject() {
        return subject;
    }

    public ArrayList<Course> getCourses(){
        return db;
    }

    public CourseCatalog(String subject, String catalogYear) throws CatalogNotFoundException {


        String subj = subject.trim().toUpperCase();
        Integer trm = Integer.parseInt(catalogYear.trim());

        // TODO: Add exception handling. This method should specify that it throws an exception
        // Address the problem of subject and/or catalog year not existing


        

        this.catalogYear = trm.toString();
        this.subject = subj.toUpperCase();
        this.db = new ArrayList();

        Gson gson = new Gson();
        try {
            String filename = subject+"_"+catalogYear+".json";
            File file = new File("src/main/resources/" + filename);

            if (file.createNewFile()) {
                ArrayList<String> list = CourseCatalog.getCourseNumbers(this.subject, this.catalogYear);
                if (list.size() > 0) {
                    for (String s : list) {
                        Course c = new Course(this.subject, s, this.catalogYear);
                        this.db.add(c);
                    }
                } else {
                    this.subject = null;
                    this.catalogYear = null;
                    this.db = null;

                }
                if(db.size() == 0){
                    throw new CatalogNotFoundException("Catalog not found");
                }
                String jsonString = gson.toJson(db);
                FileWriter out = new FileWriter(file);
                out.write(jsonString);
                out.close();
            } else {
                FileReader in = new FileReader(file);
                JsonReader jr = gson.newJsonReader(in);
                Course[] instance = gson.fromJson(jr, Course[].class);
                for(int i=0; i < instance.length ; i++){
                    db.add(instance[i]);

                }

            }
        } catch (IOException ex){

        }
        
    }



    public Course getCourse(String number){
        Course result = null;
        for (Course c: db){
            if (c.getNumber().equalsIgnoreCase(number)){
                result = c;
                break;
            }
        }
        return result;
    }

    public String toString(){
        return this.db.toString();
    }

    @SuppressWarnings("SpellCheckingInspection")
    public static ArrayList<String> getCourseNumbers(String subject, String catalogYear){

        Document doc = null;
        ArrayList<String> list = new ArrayList();

        try {
            doc = Jsoup.connect("https://ttuss1.tntech.edu/PROD/bwckctlg.p_display_courses?sel_crse_strt=1000&sel_crse_end=4999&sel_subj=&sel_levl=&sel_schd=&sel_coll=&sel_divs=&sel_dept=&sel_attr="+"&term_in="+catalogYear+"&one_subj="+subject).get();
            Elements courseTitles = doc.select(".nttitle");
            for (Element title : courseTitles) {
                String line = title.text();
                Scanner scan = new Scanner(line);
                scan.useDelimiter(" ");
                scan.next();
                String crseNum = scan.next();
                list.add(crseNum);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return list;
    }

    private static void log(String msg, String... vals) {
        System.out.println(String.format(msg, vals));
    }

}


    package edu.tntech.csc2310;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import javax.security.auth.Subject;
import java.io.IOException;
import java.util.Scanner;

@SuppressWarnings("SpellCheckingInspection")
public class Course {

    private static final String url = "https://ttuss1.tntech.edu/PROD/bwckctlg.p_disp_course_detail?";
    private String subject;
    private String number;
    private String title;
    private String description;
    private int credits;
    private String[] prerequisites;

    public Course(String subject, String number, String term) throws CourseNotFoundException{

        /**
         * TODO: Add exception support here. This method should specify that it throws an exception
         * Modify this method so that it throws a CourseNotFoundException if the course does
         * not exist.
         */


        String subj = subject.trim().toUpperCase();
        String numb = number.trim();
        Integer trm = Integer.parseInt(term.trim());
        Integer numbTest = Integer.parseInt(numb);

        String searchUrl = url + "&cat_term_in=" + trm.toString() + "&subj_code_in=" + subj + "&crse_numb_in=" + numbTest.toString();
        try {
            Document doc = Jsoup.connect(searchUrl).get();
            Elements elements = doc.select(".nttitle");
            if (elements.size() > 0) {
                String temp = (String) elements.get(0).text();
                int index = temp.indexOf('-');
                this.title = temp.substring(index + 2);

                Elements courseDescription = doc.select(".ntdefault");
                this.description = courseDescription.get(0).text();
                this.subject = subj;
                this.number = numb;
                this.credits = (int) this.parseCRH();
            } else {
                this.subject = null;
                this.description = null;
                this.number = null;
                this.credits = -1;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }






    private double parseCRH(){
        int index = this.description.indexOf("Credit hours");
        String tmp = this.description.substring(0, index-1);
        int first = tmp.lastIndexOf(" ");
        int idx = Math.max(first, 0);
        tmp = tmp.substring(idx).trim();
        return Double.parseDouble(tmp);
    }

    public String getSubject() {
        return subject;
    }

    public String getNumber() {
        return number;
    }

    public String getTitle() {
        return title;
    }

    public String getDescription() {
        return description;
    }

    /**
     * Creates a flattened list of pre-requisites; removes C or D or better information,
     * as well as disjunctive normal form. All structure should be removed from the pre-requisite list
     * @return
     */
    public String[] getPrerequisites() {

        String[] repls = {
                "Course or Test: ",
                "Minimum Grade of C ",
                "Minimum Grade of D ",
                "May not be taken concurrently.",
                "May be taken concurrently.",
                "(",
                ")"
        };

        String[] list = null;
        if (this.description != null) {
            int sindex = this.description.lastIndexOf("Requirements:");
            if (sindex > 0) {
                String subStr = this.description.substring(sindex + 13).trim();
                for (int i = 0; i < repls.length; i++) {
                    subStr = subStr.replace(repls[i], "");
                }
                subStr = subStr.replace("or", ",");
                subStr = subStr.replace("and", ",");
                list = subStr.split(",");
            }
        }
        return list;
    }

    public int getCredits() {
        return credits;
    }

    public String toString(){
        return subject + " " + number + " " + title + "\n" + description;
    }

    public String toString(boolean full){
        if (full)
            return this + this.description;
        else
            return this.toString();
    }

    private static void log(String msg, String... vals) {
        System.out.println(String.format(msg, vals));
    }

}

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM