오늘은 Python의 DataFrame을 활용하여 MySQL에 데이터를 넣을 때에, 정의해 둔 unique Key 스키마로 인해 에러가 나는 경우 처리하는 방법에 대해 알아보겠습니다.

우선 발생하는 에러는 아래와 같습니다.

Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1782, in _execute_context
    self.dialect.do_executemany(
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/dialects/mysql/mysqldb.py", line 193, in do_executemany
    rowcount = cursor.executemany(statement, parameters)
  File "/usr/local/lib/python3.9/site-packages/pymysql/cursors.py", line 173, in executemany
    return self._do_execute_many(
  File "/usr/local/lib/python3.9/site-packages/pymysql/cursors.py", line 211, in _do_execute_many
    rows += self.execute(sql + postfix)
  File "/usr/local/lib/python3.9/site-packages/pymysql/cursors.py", line 148, in execute
    result = self._query(query)
  File "/usr/local/lib/python3.9/site-packages/pymysql/cursors.py", line 310, in _query
    conn.query(q)
  File "/usr/local/lib/python3.9/site-packages/pymysql/connections.py", line 548, in query
    self._affected_rows = self._read_query_result(unbuffered=unbuffered)
  File "/usr/local/lib/python3.9/site-packages/pymysql/connections.py", line 775, in _read_query_result
    result.read()
  File "/usr/local/lib/python3.9/site-packages/pymysql/connections.py", line 1156, in read
    first_packet = self.connection._read_packet()
  File "/usr/local/lib/python3.9/site-packages/pymysql/connections.py", line 725, in _read_packet
    packet.raise_for_error()
  File "/usr/local/lib/python3.9/site-packages/pymysql/protocol.py", line 221, in raise_for_error
    err.raise_mysql_exception(self._data)
  File "/usr/local/lib/python3.9/site-packages/pymysql/err.py", line 143, in raise_mysql_exception
    raise errorclass(errno, errval)
pymysql.err.IntegrityError: (1062, "Duplicate entry '경기도 남양주시 오남읍 오남리-진건오남로 631' for key 'buildings.dist_origin'")

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/Users/gang-yeji/Workspace/zipzup_collector/parser.py", line 144, in <module>
    buildingInfo = updateDabaseFromExcel(
  File "/Users/gang-yeji/Workspace/zipzup_collector/parser.py", line 128, in updateDabaseFromExcel
    updateDatabase(info, tableName)
  File "/Users/gang-yeji/Workspace/zipzup_collector/parser.py", line 121, in updateDatabase
    info.to_sql(name=tableName, con=conn,
  File "/usr/local/lib/python3.9/site-packages/pandas/core/generic.py", line 2963, in to_sql
    return sql.to_sql(
  File "/usr/local/lib/python3.9/site-packages/pandas/io/sql.py", line 697, in to_sql
    return pandas_sql.to_sql(
  File "/usr/local/lib/python3.9/site-packages/pandas/io/sql.py", line 1736, in to_sql
    total_inserted = sql_engine.insert_records(
  File "/usr/local/lib/python3.9/site-packages/pandas/io/sql.py", line 1329, in insert_records
    raise err
  File "/usr/local/lib/python3.9/site-packages/pandas/io/sql.py", line 1319, in insert_records
    return table.insert(chunksize=chunksize, method=method)
  File "/usr/local/lib/python3.9/site-packages/pandas/io/sql.py", line 947, in insert
    num_inserted = exec_insert(conn, keys, chunk_iter)
  File "/usr/local/lib/python3.9/site-packages/pandas/io/sql.py", line 854, in _execute_insert
    result = conn.execute(self.table.insert(), data)
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1289, in execute
    return meth(self, multiparams, params, _EMPTY_EXECUTION_OPTS)
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/sql/elements.py", line 325, in _execute_on_connection
    return connection._execute_clauseelement(
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1481, in _execute_clauseelement
    ret = self._execute_context(
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1845, in _execute_context
    self._handle_dbapi_exception(
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 2026, in _handle_dbapi_exception
    util.raise_(
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/util/compat.py", line 207, in raise_
    raise exception
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1782, in _execute_context
    self.dialect.do_executemany(
  File "/usr/local/lib/python3.9/site-packages/sqlalchemy/dialects/mysql/mysqldb.py", line 193, in do_executemany
    rowcount = cursor.executemany(statement, parameters)
  File "/usr/local/lib/python3.9/site-packages/pymysql/cursors.py", line 173, in executemany
    return self._do_execute_many(
  File "/usr/local/lib/python3.9/site-packages/pymysql/cursors.py", line 211, in _do_execute_many
    rows += self.execute(sql + postfix)
  File "/usr/local/lib/python3.9/site-packages/pymysql/cursors.py", line 148, in execute
    result = self._query(query)
  File "/usr/local/lib/python3.9/site-packages/pymysql/cursors.py", line 310, in _query
    conn.query(q)
  File "/usr/local/lib/python3.9/site-packages/pymysql/connections.py", line 548, in query
    self._affected_rows = self._read_query_result(unbuffered=unbuffered)
  File "/usr/local/lib/python3.9/site-packages/pymysql/connections.py", line 775, in _read_query_result
    result.read()
  File "/usr/local/lib/python3.9/site-packages/pymysql/connections.py", line 1156, in read
    first_packet = self.connection._read_packet()
  File "/usr/local/lib/python3.9/site-packages/pymysql/connections.py", line 725, in _read_packet
    packet.raise_for_error()
  File "/usr/local/lib/python3.9/site-packages/pymysql/protocol.py", line 221, in raise_for_error
    err.raise_mysql_exception(self._data)
  File "/usr/local/lib/python3.9/site-packages/pymysql/err.py", line 143, in raise_mysql_exception
    raise errorclass(errno, errval)
sqlalchemy.exc.IntegrityError: (pymysql.err.IntegrityError) (1062, "Duplicate entry '경기도 남양주시 오남읍 오남리-진건오남로 631' for key 'buildings.dist_origin'")

 

 

 

에러가 참 기네요.

다양한 솔루션을 한 번 찾아보았는데요.
제가 찾아본 방법 중에 가장 간단한 방법은 아래와 같습니다.

def updateDatabase(info, tableName):
    try:
        tempTableName = tableName + "Temp"
        info.to_sql(name=tempTableName, con=conn,
                    index=False, if_exists='replace')
        columns = table_column_names(tableName)
        insert_query = f'INSERT IGNORE INTO {tableName}({columns}) SELECT {columns} FROM `{tempTableName}`'
        db_connection.execute(insert_query)
        print("Successfully " + tableName + " updated")
    except Exception as e:
        print(e)

먼저 정제한 DataFrame을 테이블 이름과 중복되지 않게 변경하여(tempTableName) DB에 올립니다.
그 다음, DB문을 통해 임시로 지정한 테이블(tempTableName)의 전체를 원래 저장하려고 했던 테이블(tableName)에 이동시킵니다.
이 때, 에러가 발생한 데이터들은 무시하고 진행하게 됩니다.

반응형
복사했습니다!