简体   繁体   中英

Ignore cascade on foreign key update?

To preface, I'm not very experienced with database design. I have a table of hashes and ids. When a group of new hashes are added, each row in the group gets the same id. If any hash within the new group already exists in the database, all hashes in the new group and existing group(s) get a new, shared id (effectively merging ids when hashes are repeated):

INSERT INTO hashes 
    (id, hash) 
VALUES 
    ($new_id, ...), ($new_id, ...)
ON DUPLICATE KEY UPDATE 
    repeat_count = repeat_count + 1;

INSERT INTO hashes_lookup SELECT DISTINCT id FROM hashes WHERE hash IN (...);
UPDATE hashes JOIN hashes_lookup USING (id) SET id = '$new_id';
TRUNCATE TABLE hashes_lookup;

Other tables reference these ids, so that if an id changes, foreign key constraints take care of updating the ids across tables. The issue here, however, is that I can't enforce uniqueness across any of the child tables. If I do, my queries fail with:

Foreign key constraint for table '...', record '...' would lead to a duplicate entry in table '...'

This error makes sense, given the following test case where id and value are a composite unique key:

id | value
---+-------
a  | 1
b  | 2
c  | 1

Then a gets changed to c :

id | value
---+-------
c  | 1
b  | 2
c  | 1

But c,1 already exists.

It would be ideal if there was an ON UPDATE IGNORE CASCADE option, so that if a duplicate row exists, any duplicating inserts are ignored. However, I'm pretty sure the real issue here is my database design, so I am open to any and all suggestions. My current solution is to not enforce uniqueness across child tables, which leads to a lot of redundant rows.

Edit:

CREATE TABLE `hashes` (
 `hash` char(64) NOT NULL,
 `id` varchar(128) NOT NULL,
 `repeat_count` int(11) NOT NULL DEFAULT '0',
 `insert_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 `update_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 UNIQUE KEY `hash` (`hash`) USING BTREE,
 KEY `id` (`id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=latin1

CREATE TABLE `emails` (
 `id` varchar(128) NOT NULL,
 `group_id` char(5) NOT NULL,
 `email` varchar(500) NOT NULL,
 KEY `index` (`id`) USING BTREE,
 UNIQUE KEY `id` (`id`,`group_id`,`email`(255)) USING BTREE,
 CONSTRAINT `emails_ibfk_1` FOREIGN KEY (`id`) REFERENCES `hashes` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1

I think will be good to create table hash_group to store id of hash group:

CREATE TABLE `hash_group` (
 `id` BIGINT AUTO_INCREMENT NOT NULL,
 `group_name` varchar(128) NOT NULL,
 `insert_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 `update_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 UNIQUE KEY `group_name` (`group_name`) USING BTREE,
 PRIMARY KEY (`id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

And change structure of existing tables:

CREATE TABLE `hashes` (
 `hash` char(64) NOT NULL,
 `hash_group_id` BIGINT NOT NULL,
 `repeat_count` int(11) NOT NULL DEFAULT '0',
 `insert_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 `update_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 UNIQUE KEY `hash` (`hash`) USING BTREE,
 KEY `hashes_hash_group_id_index` (`hash_group_id`) USING BTREE,
 CONSTRAINT `hashes_hash_group_id_fk` FOREIGN KEY (`hash_group_id`) REFERENCES `hash_group` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

CREATE TABLE `emails` (
 `hash_group_id` BIGINT NOT NULL,
 `group_id` char(5) NOT NULL,
 `email` varchar(500) NOT NULL,
 KEY `emails_hash_group_id_index` (`hash_group_id`) USING BTREE,
 UNIQUE KEY `emails_unique` (`hash_group_id`,`group_id`,`email`(255)) USING BTREE,
 CONSTRAINT `emails_ibfk_1` FOREIGN KEY (`hash_group_id`) REFERENCES `hash_group` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

Also create trigger to update hash group if you need to do it:

DELIMITER $$
CREATE TRIGGER `update_hash_group_name` AFTER UPDATE ON `hashes`
FOR EACH ROW
BEGIN
    UPDATE `hash_group` 
    SET `group_name` = md5(now()) -- replace to you hash formula
    WHERE id = NEW.hash_group_id;
END;$$
DELIMITER ;

And create function for getting actual group id:

DROP FUNCTION IF EXISTS get_hash_group;

DELIMITER $$
CREATE FUNCTION get_hash_group(id INT) RETURNS INT
BEGIN
  IF (id IS NULL) THEN
    INSERT INTO `hash_group` (`group_name`) 
    VALUES (md5(now())); -- replace to you hash
    RETURN LAST_INSERT_ID();
  END IF;

  RETURN id;
END;$$
DELIMITER ;

Scenario:

Initial fill:

INSERT INTO `hash_group` (id, group_name) VALUES 
(1, 'test1'),
(2, 'test2'),
(3, 'test3');

INSERT INTO `hashes` (hash, hash_group_id) VALUES
('hash11', 1),
('hash12', 1),
('hash13', 1),
('hash2', 2),
('hash3', 3);

INSERT INTO `emails` (hash_group_id, group_id, email)
VALUES
(1, 'g1', 'example1@'),
(2, 'g1', 'example2@'),
(3, 'g1', 'example2@');

Updating of hash_group scenario:

START TRANSACTION;

-- Get @min_group_id - minimum group id (we will leave this id and delete other)

SELECT MIN(hash_group_id) INTO @min_group_id
FROM hashes 
WHERE hash IN ('hash11', 'hash12', 'hash2', 'hash15');

-- Replace other group ids in email table to @min_group_id

UPDATE `emails` 
SET `hash_group_id` = @min_group_id
WHERE `hash_group_id` IN (
  SELECT hash_group_id
  FROM hashes 
  WHERE @min_group_id IS NOT NULL
  AND hash IN ('hash11', 'hash12', 'hash2', 'hash15')
  -- Update only if we are gluy several hash_groups
  AND `hash_group_id` > @min_group_id
);

-- Delete other hash_groups and leave only group with @min_group_id

DELETE FROM `hash_group` WHERE `id` IN (
  SELECT hash_group_id
  FROM hashes 
  WHERE @min_group_id IS NOT NULL
  AND hash IN ('hash11', 'hash12', 'hash2', 'hash15')
  -- Delete only if we are gluy several hash_groups
  AND `hash_group_id` > @min_group_id
);

-- @group_id = existing hash_group.id or create new if @min_group_id is null (all inserted hashes are new)

SELECT get_hash_group(@min_group_id) INTO @group_id;

-- Now we can insert new hashes.

INSERT INTO `hashes` (hash, hash_group_id) VALUES
('hash11', @group_id),
('hash12', @group_id),
('hash2', @group_id),
('hash15', @group_id)
ON DUPLICATE KEY 
UPDATE repeat_count = repeat_count + 1;


COMMIT;

I maybe wrong but I think you mis-named the id field in hashes .

I think you should rename the id field in hashes to something like group_id , then have a AUTO_INCREMENT field called id that should also be PRIMARY in hashes that the id in emails refers to this field instead. When you want to update and relate all the hashes together, you update the group_id field instead of id , and id remains unique across the table.

This way you can avoid the cascade problem, also you will always know the original hash that the email was referring to. Sure, if you want to fetch all the hashes related to an email (old and the new) you must exectue and extra query, but I think it solves all your problems.

Edit:
you can use a trigger to do this

The trigger goes like this

DELIMITER $$
CREATE TRIGGER `update_hash_id` AFTER UPDATE ON `hashes`
FOR EACH ROW
BEGIN
    UPDATE `emails` SET `id` = NEW.id WHERE `id` = OLD.id;
END;$$
DELIMITER ;

and you must remove the foreign key relation too.

Adding an extra integer column to each of the child tables would avoid this problem altogether by using it as a primary key. The key never changes because it isn't a reference to anything else.

Using composite keys as primary keys is generally something that you want to avoid. And considering that this key combination is not always unique, I would definitely say you need a dedicated primary key in all of your child tables with this problem.

You can even auto increment it so you aren't manually assigning it every time. For example..

Create Table exampleTable
(   
    trueID int NOT NULL AUTO_INCREMENT,
    col1 int NOT NULL,
    col2 varChar(50)
    PRIMARY KEY(trueID)
)

Then, when two of the rows in a child table are set with identical values (for whatever reason), the primary key stays unique, preventing any conflicts in the Database that could arise.

The solution, which we have arrived in chat chat :

/* Tables */

CREATE TABLE `emails` (
 `group_id` bigint(20) NOT NULL,
 `email` varchar(500) NOT NULL,
 UNIQUE KEY `group_id` (`group_id`,`email`) USING BTREE,
 CONSTRAINT `emails_ibfk_1` FOREIGN KEY (`group_id`) REFERENCES `entities` (`group_id`) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1

CREATE TABLE `hashes` (
 `group_id` bigint(20) NOT NULL,
 `hash` varchar(128) NOT NULL,
 `repeat_count` int(11) NOT NULL DEFAULT '0',
 UNIQUE KEY `hash` (`hash`),
 KEY `group_id` (`group_id`),
 CONSTRAINT `hashes_ibfk_1` FOREIGN KEY (`group_id`) REFERENCES `entities` (`group_id`) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1

CREATE TABLE `entities` (
 `group_id` bigint(20) NOT NULL,
 `entity_id` bigint(20) NOT NULL,
 PRIMARY KEY (`group_id`),
 KEY `entity_id` (`entity_id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

CREATE TABLE `entity_lookup` (
 `entity_id` bigint(20) NOT NULL,
 PRIMARY KEY (`entity_id`) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1

/* Inserting */

START TRANSACTION;

/* Determine next group ID */
SET @next_group_id = (SELECT MAX(group_id) + 1 FROM entities);

/* Determine next entity ID */
SET @next_entity_id = (SELECT MAX(entity_id) + 1 FROM entities);

/* Merge any entity ids */
INSERT IGNORE INTO entity_lookup SELECT entity_id FROM entities JOIN hashes USING(group_id) WHERE HASH IN(...);
UPDATE entities JOIN entity_lookup USING(entity_id) SET entity_id = @next_entity_id;
TRUNCATE TABLE entity_lookup;

/* Add the new group ID to entity_id */
INSERT INTO entities(group_id, entity_id) VALUES(@next_group_id, @next_entity_id);

/* Add new values into hashes */
INSERT INTO hashes (group_id, HASH) VALUES 
    (@next_group_id, ...)
ON DUPLICATE KEY UPDATE
  repeat_count = repeat_count + 1;

/* Add other new values */
INSERT IGNORE INTO emails (group_id, email) VALUES
    (@next_group_id, "email1");

COMMIT;

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM