Thursday, December 8, 2016

[google-cloud-sql-discuss] Re: Unix Socket sometimes not present

import pipeline
import random
import time
import uuid
import logging
import sqlalchemy
from sqlalchemy import orm
from sqlalchemy.orm import aliased
from sqlalchemy.sql import expression
from sqlalchemy.exc import OperationalError
from sqlalchemy.ext import declarative
from sqlalchemy.pool import NullPool


LOG = logging.getLogger(__file__)
BASE = declarative.declarative_base()
PRODUCT_A = 100
PRODUCT_B = 100


# For clarity and ease of reproduction, I have moved most functions and classes into one file

'''
Example Run:
# Modify the connection parameters below to match your settings.
db = getDb()
createTables(db.engine) # First run only
trace = str(uuid.uuid4()) # Makes finding log messages easier
pipe = SqlConnectionProductA(trace)
pipe.start()
'''


#### Begin database functions

# FIXME: Add your own credentials
def getDb():
# This fetches our appengine specific information, you should create your own
# connectionParameters, connectionString, poolclass = SqlHelper.getDatabaseConnectionInfo()

# For clarity, this is the same connection string used to connect
connectionString = 'mysql+mysqldb://{user}@/{db}?unix_socket=/cloudsql/{dbInstanceName}'

'''
The connectionParameters is in the form:
{
"db": 'mydb',
"user": 'myuser',
"dbInstanceName": 'myinstance'
}

'''

return getDbContainer(connectionParameters, connectionString)

class DbContainer(object):

def __init__(self, engine, connection):
self.engine = engine
self.connection = connection

def reconnect(self):
try:
self.connection = self.engine.connect()
except OperationalError:
LOG.exception("Failed to reconnect.")


class SessionTransaction(object):
def __init__(self, db=None):
if db is None:
raise Exception("No sql session, cannot proceed")
self.session = orm.sessionmaker(bind=db.engine)()

def __enter__(self):
return self.session

def __exit__(self, type, value, tb):
try:
self.session.commit()
except OperationalError:
self.session.rollback()

self.session.close()


def getEngine(connectionParameters, connectionString):
return sqlalchemy.create_engine(connectionString.format(**connectionParameters), poolclass=NullPool)


def getDbContainer(connectionParameters, connectionString):
engine = getEngine(connectionParameters, connectionString)
maxAttempts = 3
dbConnection = None
for attempt in xrange(0, maxAttempts):
try:
dbConnection = engine.connect()
break
except OperationalError:
LOG.warn("Failed to connect parameters %s. Retrying... %s of %s" % (connectionString.format(**connectionParameters), attempt + 1, maxAttempts))
time.sleep(4)
if attempt == maxAttempts - 1:
LOG.exception("Max reconnect attempts reached.")
raise

return DbContainer(engine, dbConnection)


class ThrashingParentTable(BASE):
__tablename__ = 'ThrashingParentTable'
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
uuid = sqlalchemy.Column(sqlalchemy.String(150))
marker1 = sqlalchemy.Column(sqlalchemy.Integer, nullable=False)
marker2 = sqlalchemy.Column(sqlalchemy.Integer, nullable=False)


# A model representing a temporary table used in a metadata update query
class ThrashingTable(BASE):
__tablename__ = 'ThrashingTable'
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
uuid = sqlalchemy.Column(sqlalchemy.String(150))
marker1 = sqlalchemy.Column(sqlalchemy.Integer, nullable=False)
marker2 = sqlalchemy.Column(sqlalchemy.Integer, nullable=False)
transaction_id = sqlalchemy.Column(sqlalchemy.String(150))


def createTables(engine):
ThrashingTable.__table__.create(bind=engine)
ThrashingParentTable.__table__.create(bind=engine)


#### End database functions

#### Begin Pipeline classes
class SqlConnectionProductA(pipeline.Pipeline):

def run(self, trace):
for x in xrange(0, PRODUCT_A):
yield SqlConnectionProductB(trace)


class SqlConnectionProductB(pipeline.Pipeline):

def run(self, trace):
for x in xrange(0, PRODUCT_B):
yield SqlConnectionTester(trace)


class SqlConnectionTester(pipeline.Pipeline):

def run(self, trace):
key = str(uuid.uuid4())
transaction_id = str(uuid.uuid4())
rows = []
# Create some data that can be inserted
for _ in xrange(0, 50):
rows.append(ThrashingTable(
uuid=key,
marker1=random.randint(0, 25),
marker2=random.randint(1481000000, 1481220141),
transaction_id=transaction_id
))

db = getDb()

'''
This is a little contrived, but close to what I was doing upon initially seeing the problem.
Essentially all you need are some transactions that cause the database to do
some real work. I've since simplified my sql, but they still do *some* work and
it seems enough to induce the problem. This just exacerbates it.
'''
with SessionTransaction(db) as session:
# Insert some rows into our thrash table
session.bulk_save_objects(rows)
session.flush()
thrashTable = aliased(ThrashingTable)

subquery = session.query(ThrashingParentTable)\
.join(thrashTable, thrashTable.uuid == key)\

q = session.query(thrashTable.uuid.label(ThrashingParentTable.uuid.name),
thrashTable.marker1.label(ThrashingParentTable.marker1.name),
thrashTable.marker2.label(ThrashingParentTable.marker2.name)) \
.filter(~subquery.exists()) \
.filter(thrashTable.transaction_id == transaction_id)

# Insert tables to the parent table, if they don't already exists
insert = expression.insert(ThrashingParentTable) \
.from_select([ThrashingParentTable.uuid,
ThrashingParentTable.marker1,
ThrashingParentTable.marker2],
q)

session.execute(insert)

session.query(ThrashingTable)\
.filter(ThrashingTable.transaction_id == transaction_id)\
.delete()


#### End pipeline classes

Hi Nick,

I appreciate you getting back to me.

I've attached an example that produces the behavior, through testing I've come to better understand a little what's going on. Once reaching a certain rate, while processing mysql is under load, the error is returned. Originally I created an example that just sent "SELECT 1" to mysql and could not induce the error, so MySQL has to be under some load.

The example I've attached induces the error, though there are two classes of errors:

1. The "No such file" error I've originally reported
2. The understandable Deadlock error from mysql

I'm not concerned about #2 as that's just a SQL optimization I've already taken care of, but I still receive the "No such file" error in my production code without a single deadlock error.

"The patterns and frequences of connections on your instances"

I have a taskqueue that is setup to process all my sql connections, so that 1, requests aren't hampered by SQL operations and 2, we can rate limit the interactions with MySQL. My autoscale settings are set to restrict to 6 connections, due to the 12 connection limit, I wanted to give it some room.

"The way in which you've determined it's isolated to a given instance"

I don't think I was clear when I described this, once an instance starts exhibiting this symptom, subsequent requests to the instance all exhibit the symptom. I determined this just by checking the instance id in the log messages and through restricting to a given instance id.

Hopefully my attached example will enable reproduction. It's a little messy because I consolidated several files into one file for ease of transport, but it should be fairly straightforward.

Let me know if you need me to file this away to the issue tracker you mentioned, I'm happy to do so.

Cheers,

Myles

On Tuesday, December 6, 2016 at 1:44:54 PM UTC-7, paynen wrote:
Hey Myles,

An issue like this would be best reported in the Public Issue Tracker for Cloud SQL.

Nonetheless, we can continue to work on it here until we can determine more accurately what should go in the report. There are some more pieces of information that could be relevant here:

* The patterns and frequencies of connections on your instances 

* The way in which you've determined it's isolated to a given instance

* A description of the pipeline task you're performing so we can attempt to reproduce the issue that way

Feel free to add any more information you think could help in reproducing this issue.

Regards,

Nick
Cloud Platform Community Support

On Monday, December 5, 2016 at 1:02:56 PM UTC-5, Myles Bostwick wrote:
Hello All,

I've been playing around with the pipelines library (https://github.com/GoogleCloudPlatform/appengine-pipelines) in my standard appengine environment and managed to get enough parallel instances running to cause problems. Sometimes, persistent to an instance it seems, I don't have a unix socket (as seen in the below stacktrace). Things I have tried to mitigate this:

  1. On failed connect, try 3 times with 4 seconds between attempts (since I have a 60 second max request time, this seems HUGE)
  2. Reducing concurrent requests per instance to 8
  3. Reducing max concurrent requests to 20
  4. Upgrading from First Generation to Second Generation Cloud SQL instances
  5. Restricting to a single instance (which does "solve" the problem, but doesn't meet my throughput goals)
I've consistently been able to reproduce this issue, in its inconsistent nature. My final recourse is going to be wait the 3 times, then connect via IP instead. Any thoughts suggestions in addition to this would be greatly appreciated.

Thanks in advance,

Myles


...
  File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/engine/base.py", line 1778, in connect return self._connection_cls(self, **kwargs) File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/engine/base.py", line 60, in __init__ self.__connection = connection or engine.raw_connection() File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/engine/base.py", line 1847, in raw_connection return self.pool.unique_connection() File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/pool.py", line 280, in unique_connection return _ConnectionFairy._checkout(self) File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/pool.py", line 644, in _checkout fairy = _ConnectionRecord.checkout(pool) File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/pool.py", line 440, in checkout rec = pool._do_get() File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/pool.py", line 1057, in _do_get return self._create_connection() File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/pool.py", line 285, in _create_connection return _ConnectionRecord(self) File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/pool.py", line 411, in __init__ self.connection = self.__connect() File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/pool.py", line 538, in __connect connection = self.__pool._creator() File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/engine/strategies.py", line 96, in connect connection_invalidated=invalidated File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/util/compat.py", line 199, in raise_from_cause reraise(type(exception), exception, tb=exc_tb) File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/engine/strategies.py", line 90, in connect return dialect.connect(*cargs, **cparams) File "/base/data/home/apps/s~hydrovu-dev/api:2-0-0.397476056640579045/lib/sqlalchemy/engine/default.py", line 377, in connect return self.dbapi.connect(*cargs, **cparams) File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/MySQLdb-1.2.4b4/MySQLdb/__init__.py", line 81, in Connect return Connection(*args, **kwargs) File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/MySQLdb-1.2.4b4/MySQLdb/connections.py", line 190, in __init__ super(Connection, self).__init__(*args, **kwargs2) OperationalError: (OperationalError) (2062, 'Cloud SQL socket open failed with error: No such file or directory') None None

--
You received this message because you are subscribed to the Google Groups "Google Cloud SQL discuss" group.
To unsubscribe from this group and stop receiving emails from it, send an email to google-cloud-sql-discuss+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/google-cloud-sql-discuss/2042a095-d425-49ae-9ad7-b16a31c5ceb6%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

No comments:

Post a Comment