简体   繁体   中英

URL breaking of at space in my shell script

#!/bin/sh

scriptname=$0   # $0 is the name of the program
verbose=no
probeonly=no

usage () {
   cat <<EOF
Usage: $scriptname [-o] [-a] [-d] [-c] [-r] [-p] [-h] [-w] movie name
   -a   Lead actor and Actress
   -c   Cast
   -d   Duration
   -h   Help
   -H   History of previous searches by the user
   -o   Overview(Actor,Actress,Ratings,Director and Plot)
   -p   Download the poster
   -r   Rating
   -w   Movies to be released this week

EOF
   exit 0
}
getUrl()
{
    url_first_part="http://www.imdb.com/find?ref_=nv_sr_fn&q="
    url_last_part="&s=all"
    url="${url_first_part}${OPTARG}${url_last_part}"
    echo "${url}"
    content=$(wget ${url} -q -O ~/movie_list) #to save the page source in a local file called movie_list
    count=$(grep -Po -m 1 "(?<=td class=\"primary_photo\"> <a href=\").*?(?=\s*ref_=fn_al_tt_1\")" ~/movie_list|head -1) 
    part_to_be_added="ref_=fn_al_tt_1"
    final_url="www.imdb.com$count$part_to_be_added"
    echo $final_url
    rm ~/movie_list
}
print()
{
    echo "$movie"
}

unset flag #this is to unset the value of flag if it had any value previously
while getopts ":a:c:d:hHo:p:r:w" opt
do
    case $opt in
        a)
          movie="${OPTARG}"
          #echo "-a was triggered, Parameter: $OPTARG" >&2
          getUrl
          flag='1' #we set flag to 1 so as to check if an option was pased or not.Since it skips the getopt part if no option was passed
          ;;
        c)
              getUrl
          #echo "-c was triggered, Parameter: $OPTARG" >&2
          flag='1'
          ;;
            d)
              getUrl
          #echo "-d was triggered, Parameter: $OPTARG" >&2
              flag='1'
          ;;
            h)usage
              flag='1'
          ;;
            H)
              getUrl
          #echo "-H was triggered, Parameter: $OPTARG" >&2
              flag='1'
          ;;
            o)
              getUrl
          #echo "-o was triggered, Parameter: $OPTARG" >&2
              flag='1'
          ;;
            p)
              getUrl
          #echo "-p was triggered, Parameter: $OPTARG" >&2
              flag='1'
          ;;
            r)
              getUrl
          #echo "-r was triggered, Parameter: $OPTARG" >&2
              flag='1'
          ;;
            w)
              getUrl
          echo "-w was triggered, Parameter: $OPTARG" >&2
              flag='1'
          ;;
        \?)
          echo "Invalid option: -$OPTARG" >&2
              flag='1'
          usage
          ;;
        :)
          echo "Option -$OPTARG requires an argument." >&2
              usage
          flag='1'
          ;;
      esac
done
if [ -z "$flag" ] #Here we check if an option was passed or not
then
    echo "No options were passed"
    usage
fi

For some reason,my url is splitting up whenever I enter an otion having more than one word in quotes.If I enter pirates of the carribean,even though the url prints it to be http://www.imdb.com/find?ref_=nv_sr_fn&q=pirates of carribean&s=all.The site that wget goes to is http://www.imdb.com/find?ref_=nv_sr_fn&q=pirates . I am kinda new to unix.Please do help me out with this.I couldnt debug.

you protected pretty much every string against spaces but not here:

content=$(wget ${url} -q -O ~/movie_list)

Should be:

content=$(wget "${url}" -q -O ~/movie_list)

And BTW you'd rather use /tmp for your temporary files (rather than your home directory)

There are a few things wrong. At the scripting level, you need quotes around ${url} :

content=$(wget "${url}" -q -O ~/movie_list) #to save the page source in a local file called movie_list

This prevents the spaces from causing the URL to be sent as multiple parameters to wget .

Another option, which can be combined with the above, is to properly form the URLs. Replace those spaces with either + or %20 :

url="${url_first_part}$(perl -pe 'chomp;s/ /+/g; s/([^A-Za-z0-9\+-])/sprintf("%%%02X", ord($1))/seg;' <<<"${OPTARG}" )${url_last_part}"

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM