SQL query works by itself but breaks with 'unknown column in on clause' in stored procedure

Question

My select query with an inner joined subquery works when I run it alone, but results in the error Unknown column 'cities.state' in 'on clause' in the context of a stored procedure. What could cause a query that normally works to break inside a stored procedure? Why would this unknown column error be returned for cities.state and not cities.name ? MySQL 5.7

CREATE DEFINER=`root`@`localhost` PROCEDURE `geogen`(IN State CHAR(2), CityMin INT(5), CityMax INT(6))
BEGIN

#Create a temporary table to hold the final GeoArea product
DROP TABLE IF EXISTS `geodb`.`geoareas`;
CREATE TEMPORARY TABLE `geodb`.`geoareas` LIKE `geodb`.`geoareatemplate`;

INSERT INTO `geodb`.`geoareas` (`geoarea`, `zip`, `state`)
# Zip code sets of cities/towns with a population between CityMin and CityMax are their own GeoAreas.
SELECT CONCAT(`uszipcode`.`name`, ' ', `uszipcode`.`state`) as 'geoarea', `uszipcode`.`zip`, `uszipcode`.`state`
FROM `geodb`.`uszipcode`
INNER JOIN
(SELECT `name`, `state`, SUM(`population`) AS 'Population'
FROM `geodb`.`uszipcode`
WHERE `uszipcode`.`state` = State
GROUP BY `name`
HAVING (SUM(`population`) >= CityMin AND SUM(`population`) <= CityMax)) as `cities`
ON `uszipcode`.`name` = `cities`.`name`
AND `uszipcode`.`state` = `cities`.`state`
ORDER BY `uszipcode`.`name`, `uszipcode`.`zip`;

SELECT * FROM geodb.geoareas;
END

When I run this query with

CALL geogen('TX', 35000, 70000);

I get the error

0   76  13:10:22    CALL geogen('TX', 35000, 70000) Error Code: 1054. Unknown column 'cities.state' in 'on clause'  0.031 sec

However, when I run the query by itself, results are returned.

SELECT CONCAT(`uszipcode`.`name`, ' ', `uszipcode`.`state`) as 'geoarea', `uszipcode`.`zip`, `uszipcode`.`state`
FROM `geodb`.`uszipcode`
INNER JOIN
(SELECT `name`, `state`, SUM(`population`) AS 'Population'
FROM `geodb`.`uszipcode`
WHERE `uszipcode`.`state` = 'TX'
GROUP BY `name`
HAVING (SUM(`population`) >= 30000 AND SUM(`population`) <= 70000)) as `cities`
ON `uszipcode`.`name` = `cities`.`name`
AND `uszipcode`.`state` = `cities`.`state`
ORDER BY `uszipcode`.`name`, `uszipcode`.`zip`;

Result

LIMIT 0, 1000   121 row(s) returned 0.453 sec / 0.000 sec

Answer 1

MySQL was confused because the parameter/variable named State was similar to the column name cities.state . I believe this is a MySQL bug, because I used backticks and explicitly referenced the column by prefacing it with the table name, but please correct me if I am wrong and this is expected behavior. The results of the stored procedure matched the results of the standalone query when I updated the State parameter to VarState as shown below.

CREATE DEFINER=`root`@`localhost` PROCEDURE `geogen`(IN VarState CHAR(2), CityMin INT(5), CityMax INT(6))
BEGIN

#Create a temporary table to hold the final GeoArea product
DROP TABLE IF EXISTS `geodb`.`geoareas`;
CREATE TEMPORARY TABLE `geodb`.`geoareas` LIKE `geodb`.`geoareatemplate`;

# Zip code sets of cities/towns with a population between CityMin and CityMax are their own GeoAreas.
INSERT INTO `geodb`.`geoareas` (`geoarea`, `zip`, `state`)
SELECT CONCAT(`uszipcode`.`name`, ' ', `uszipcode`.`state`) as 'geoarea', `uszipcode`.`zip`, `uszipcode`.`state`
FROM `geodb`.`uszipcode`
INNER JOIN
(SELECT `name`, `state`, SUM(`population`) AS 'Population'
FROM `geodb`.`uszipcode`
WHERE `uszipcode`.`state` = VarState
GROUP BY `name`
HAVING (SUM(`population`) >= CityMin AND SUM(`population`) <= CityMax)) AS `cities`
ON `uszipcode`.`name` = `cities`.`name`
AND `uszipcode`.`state` = `cities`.`state`
ORDER BY `uszipcode`.`name`, `uszipcode`.`zip`;

SELECT * FROM geodb.geoareas
ORDER BY `geoarea`, `zip`;
END

Answer 2

Try wrapping the SELECT in () as follows:

INSERT INTO `geodb`.`geoareas` (`geoarea`, `zip`, `state`)
(SELECT CONCAT(`uszipcode`.`name`, ' ', `uszipcode`.`state`) as 'geoarea', `uszipcode`.`zip`, `uszipcode`.`state`
FROM `geodb`.`uszipcode`
INNER JOIN
(SELECT `name`, `state`, SUM(`population`) AS 'Population'
FROM `geodb`.`uszipcode`
WHERE `uszipcode`.`state` = State
GROUP BY `name`, `state`
HAVING (SUM(`population`) >= CityMin AND SUM(`population`) <= CityMax)) as `cities`
ON `uszipcode`.`name` = `cities`.`name`
AND `uszipcode`.`state` = `cities`.`state`
ORDER BY `uszipcode`.`name`, `uszipcode`.`zip`);

Answer 3

try this:

 DELIMITER $$
CREATE DEFINER=`root`@`localhost` PROCEDURE `geogen`(IN State CHAR(2), CityMin INT(5), CityMax INT(6))
BEGIN

#temp cities
DROP TABLE IF EXISTS `geodb`.`cities_temp`;
CREATE TEMPORARY TABLE `geodb`.`cities_temp`
SELECT `name`, `state`, SUM(`population`) AS 'Population'
FROM `geodb`.`uszipcode`
WHERE `uszipcode`.`state` = State
GROUP BY `name`
HAVING (SUM(`population`) >= CityMin AND SUM(`population`) <= CityMax) ;
#Create a temporary table to hold the final GeoArea product
DROP TABLE IF EXISTS `geodb`.`geoareas`;
CREATE TEMPORARY TABLE `geodb`.`geoareas` LIKE `geodb`.`geoareatemplate`;

INSERT INTO `geodb`.`geoareas` (`geoarea`, `zip`, `state`)
# Zip code sets of cities/towns with a population between CityMin and CityMax are their own GeoAreas.
SELECT CONCAT(`uszipcode`.`name`, ' ', `uszipcode`.`state`) as 'geoarea', `uszipcode`.`zip`, `uszipcode`.`state`
FROM `geodb`.`uszipcode`
INNER JOIN
`geodb`.`cities_temp` as `cities`
ON `uszipcode`.`name` = `cities`.`name`
AND `uszipcode`.`state` = `cities`.`state`
ORDER BY `uszipcode`.`name`, `uszipcode`.`zip`;

SELECT * FROM geodb.geoareas;
END$$

DELIMITER ;

SQL query works by itself but breaks with 'unknown column in on clause' in stored procedure

Question

3 answers

solution1
2 ACCPTED 2017-02-14 16:47:42

solution2
1 2017-02-13 18:20:54

solution3
1 2017-02-13 18:46:37

SQL query works by itself but breaks with 'unknown column in on clause' in stored procedure

Question

3 answers

solution1 2 ACCPTED 2017-02-14 16:47:42

solution2 1 2017-02-13 18:20:54

solution3 1 2017-02-13 18:46:37

solution1
2 ACCPTED 2017-02-14 16:47:42

solution2
1 2017-02-13 18:20:54

solution3
1 2017-02-13 18:46:37