Have a question about CiviCRM? Get it answered quickly at the new CiviCRM Stack Exchange Q+A siteThis forum was archived on 25 November 2017. Learn more.How to get involved.What to do if you think you've found a bug.
/* remove +61 from long numbers: */UPDATE `civicrm_phone`SET phone=replace(phone,'+61','')WHERE phone LIKE '%+61%' AND length(phone) > 11;/* Replace letter O with number 0, strip spaces, brackets, -.,` sign: */UPDATE `civicrm_phone`SET phone=trim(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace( phone,'O','0'),'o','0'),' ',''),'`',''),'*',''),',',''),'.',''),'[',''),']',''),'(',''),')',''),'-',''))WHERE length(phone) BETWEEN 8 AND 16;/* strip leading 61 */UPDATE `civicrm_phone`SET phone= substr(phone,3)WHERE phone LIKE '61%' AND length(phone) IN (11,12); /*Should only be 11, but many invalid 610 numbers *//* temporarily strip leading 0's off valid numbers */UPDATE `civicrm_phone`SET phone= substr(phone,2)WHERE phone LIKE '0%' AND length(phone) = 10;/* attempt to supply missing area codes from first digit of post code */UPDATE civicrm_phone AS p, civicrm_address AS aSET p.phone=concat(CASE substr(a.postal_code, 1, 1) WHEN 2 THEN 2 WHEN 3 THEN 3 WHEN 4 THEN 7 WHEN 5 THEN 8 WHEN 6 THEN 8 WHEN 7 THEN 3 WHEN 8 THEN 8 END, p.phone)WHERE length(phone) = 8 AND a.contact_id = p.contact_id AND substr(postal_code, 1, 1) BETWEEN 2 AND 8;/* attempt to supply missing area codes from state_province_id */UPDATE civicrm_phone AS p, civicrm_address AS aSET p.phone=concat(CASE state_province_id WHEN 1639 THEN 8 WHEN 1640 THEN 2 WHEN 1641 THEN 7 WHEN 1642 THEN 8 WHEN 1643 THEN 3 WHEN 1644 THEN 3 WHEN 1645 THEN 8 END, p.phone)WHERE length(phone) = 8 AND a.contact_id = p.contact_id AND state_province_id BETWEEN 1639 AND 1645;/* attempt to supply missing area codes from civicrm state group */UPDATE civicrm_phone AS p, civicrm_group_contact AS gcSET p.phone=concat(CASE gc.group_id WHEN 116 THEN 3 /* vic */ WHEN 117 THEN 2 /* act */ WHEN 118 THEN 8 /* nt */ WHEN 119 THEN 2 /* nsw */ WHEN 120 THEN 7 /* qld */ WHEN 122 THEN 8 /* sa */ WHEN 123 THEN 3 /* tas */ WHEN 124 THEN 8 /* wa */ WHEN 494 THEN 3 /* vic */END, p.phone)WHERE length(phone) = 8 AND gc.group_id in (116,117,118,119,120,122,123,124,494) /*ignore 115 AG and 121 fedmps */ AND gc.status='Added' AND gc.contact_id=p.contact_id;/* put back the spaces in 1800 and 1300 numbers: */UPDATE `civicrm_phone`SET phone=CONCAT(SUBSTR(phone,1,4),' ',SUBSTR(phone,5,3),' ',SUBSTR(phone,8,3))WHERE phone LIKE '1_00%' AND length(phone) = 10;/* put back the spaces and area codes on correctly constructed mobile and home numbers: */UPDATE `civicrm_phone`SET phone=IF(phone LIKE '4%', CONCAT('0',SUBSTR(phone,1,3),' ',SUBSTR(phone,4,3),' ',SUBSTR(phone,7,3)), CONCAT('0',SUBSTR(phone,1,1),' ',SUBSTR(phone,2,4),' ',SUBSTR(phone,6,4)))WHERE phone BETWEEN 100000000 AND 999999999 AND length(phone) = 9;
- if the dedupe rule can assume pre-tidied phone numbers then its going to be a much faster job
/** * Access the information_schema to pull out all fields with contacts * * This function supports deduping by providing a complete list * of all tables in the schema - even if CiviCRM does not know about them! * It includes all table.collumn containing contact_id or entity_id, except * custom civitable relating to a non-contact id * // fixme - assumes all data is found in a single schema * @param boolean $clear_cache maybe useful if adding tables or testing * @uses $_SESSION to cache results * @return array of (schema_name).(table name).(key field) */ static public function getContactKeyFields($clear_cache = false) { // check for cached table list: if ($clear_cache || !array_key_exists('_agc.dedupefields',$_SESSION)) { $schema = 'au_drupal'; //fixme this should look up correct setting // 1 find tables with 'contact_id' as a field $sql = <<<sql SELECT concat( `TABLE_SCHEMA`, '.', `TABLE_NAME`, '.', `COLUMN_NAME`) as value FROM information_schema.COLUMNS WHERE `COLUMN_NAME` LIKE '%contact_id%' AND `TABLE_SCHEMA` = '$schema'sql; // 2 add in tables with 'entity_id' as a field $FieldList[1] = AgcDedupe::sqlToArray($sql); $sql = <<<sql SELECT concat( `TABLE_SCHEMA`, '.', `TABLE_NAME`, '.', `COLUMN_NAME`) as value FROM information_schema.COLUMNS WHERE `COLUMN_NAME` LIKE '%entity_id%' AND `TABLE_SCHEMA` = '$schema'sql; $FieldList[2] = AgcDedupe::sqlToArray($sql); // 3 remove civi custom tables where entity_id is not a contact $sql = <<<sql SELECT concat('$schema.',`table_name`, '.entity_id') as value FROM au_drupal.civicrm_custom_group WHERE `extends` IN ('Location','Address','Contribution','Activity', 'Relationship','Group', 'Membership','Participant','Event','Grant','Pledge','Case')sql; $FieldList[3] = AgcDedupe::sqlToArray($sql); $_SESSION['_agc.dedupefields'] = // = 1 + (2-3) $FieldList[1] + array_diff($FieldList[2], $FieldList[3]); } return $_SESSION['_agc.dedupefields']; } /** * Produce a list of columns where a given contact_id is found * * @uses getFieldList() to identify field to search in db schema * @return array with (table name).(key field) */ static public function locate($contact_id) { $keys = AgcDedupe::getContactKeyFields(true); $locations = array(); // places $contact_id is found foreach ($keys as $key) { $split = explode('.',$key); $table = $split[0].'.'.$split[1]; // = schema.table $field = $split[2]; // = field_name $sql = "SELECT count(*) AS n FROM $table WHERE $field=".(int)$contact_id.';'; $res=db_query($sql); $db_row = db_fetch_object($res); $n = $db_row->n; if ($n) { // contact found! $locations[] = $key; } } return $locations; }
au_drupal.civicrm_activity.source_contact_id au_drupal.civicrm_activity_target.target_contact_id au_drupal.civicrm_email.contact_id au_drupal.civicrm_group_contact.contact_id au_drupal.civicrm_group_contact_cache.contact_id au_drupal.civicrm_mailing_event_queue.contact_id au_drupal.civicrm_participant.contact_id au_drupal.civicrm_subscription_history.contact_id
IFNULL(ca.$column,'') NOT LIKE CONCAT(IFNULL(cb.$column,''),'%')
IFNULL(ca.$column,'') NOT LIKE CONCAT(IFNULL(cb.$column,''),'%') AND UPPER(SUBSTR(ca.$column,1,1))<>UPPER(cb.$column)