Merge "StringUtils: Add a utility for checking if a string is a valid regex"
[lhc/web/wiklou.git] / includes / utils / AvroValidator.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 */
20
21 /**
22 * Generate error strings for data that doesn't match the specified
23 * Avro schema. This is very similar to AvroSchema::is_valid_datum(),
24 * but returns error messages instead of a boolean.
25 *
26 * @since 1.26
27 * @author Erik Bernhardson <ebernhardson@wikimedia.org>
28 * @copyright © 2015 Erik Bernhardson and Wikimedia Foundation.
29 */
30 class AvroValidator {
31 /**
32 * @param AvroSchema $schema The rules to conform to.
33 * @param mixed $datum The value to validate against $schema.
34 * @return string|string[] An error or list of errors in the
35 * provided $datum. When no errors exist the empty array is
36 * returned.
37 * @suppress PhanUndeclaredMethod,PhanUndeclaredProperty
38 */
39 public static function getErrors( AvroSchema $schema, $datum ) {
40 switch ( $schema->type ) {
41 case AvroSchema::NULL_TYPE:
42 if ( !is_null( $datum ) ) {
43 return self::wrongType( 'null', $datum );
44 }
45 return [];
46 case AvroSchema::BOOLEAN_TYPE:
47 if ( !is_bool( $datum ) ) {
48 return self::wrongType( 'boolean', $datum );
49 }
50 return [];
51 case AvroSchema::STRING_TYPE:
52 case AvroSchema::BYTES_TYPE:
53 if ( !is_string( $datum ) ) {
54 return self::wrongType( 'string', $datum );
55 }
56 return [];
57 case AvroSchema::INT_TYPE:
58 if ( !is_int( $datum ) ) {
59 return self::wrongType( 'integer', $datum );
60 }
61 if ( AvroSchema::INT_MIN_VALUE > $datum
62 || $datum > AvroSchema::INT_MAX_VALUE
63 ) {
64 return self::outOfRange(
65 AvroSchema::INT_MIN_VALUE,
66 AvroSchema::INT_MAX_VALUE,
67 $datum
68 );
69 }
70 return [];
71 case AvroSchema::LONG_TYPE:
72 if ( !is_int( $datum ) ) {
73 return self::wrongType( 'integer', $datum );
74 }
75 if ( AvroSchema::LONG_MIN_VALUE > $datum
76 || $datum > AvroSchema::LONG_MAX_VALUE
77 ) {
78 return self::outOfRange(
79 AvroSchema::LONG_MIN_VALUE,
80 AvroSchema::LONG_MAX_VALUE,
81 $datum
82 );
83 }
84 return [];
85 case AvroSchema::FLOAT_TYPE:
86 case AvroSchema::DOUBLE_TYPE:
87 if ( !is_float( $datum ) && !is_int( $datum ) ) {
88 return self::wrongType( 'float or integer', $datum );
89 }
90 return [];
91 case AvroSchema::ARRAY_SCHEMA:
92 if ( !is_array( $datum ) ) {
93 return self::wrongType( 'array', $datum );
94 }
95 $errors = [];
96 foreach ( $datum as $d ) {
97 $result = self::getErrors( $schema->items(), $d );
98 if ( $result ) {
99 $errors[] = $result;
100 }
101 }
102 return $errors;
103 case AvroSchema::MAP_SCHEMA:
104 if ( !is_array( $datum ) ) {
105 return self::wrongType( 'array', $datum );
106 }
107 $errors = [];
108 foreach ( $datum as $k => $v ) {
109 if ( !is_string( $k ) ) {
110 $errors[] = self::wrongType( 'string key', $k );
111 }
112 $result = self::getErrors( $schema->values(), $v );
113 if ( $result ) {
114 $errors[$k] = $result;
115 }
116 }
117 return $errors;
118 case AvroSchema::UNION_SCHEMA:
119 $errors = [];
120 foreach ( $schema->schemas() as $schema ) {
121 $result = self::getErrors( $schema, $datum );
122 if ( !$result ) {
123 return [];
124 }
125 $errors[] = $result;
126 }
127 if ( $errors ) {
128 // @phan-suppress-next-line PhanTypeMismatchReturn
129 return [ "Expected any one of these to be true", $errors ];
130 }
131 return "No schemas provided to union";
132 case AvroSchema::ENUM_SCHEMA:
133 if ( !in_array( $datum, $schema->symbols() ) ) {
134 $symbols = implode( ', ', $schema->symbols );
135 return "Expected one of $symbols but recieved $datum";
136 }
137 return [];
138 case AvroSchema::FIXED_SCHEMA:
139 if ( !is_string( $datum ) ) {
140 return self::wrongType( 'string', $datum );
141 }
142 $len = strlen( $datum );
143 if ( $len !== $schema->size() ) {
144 return "Expected string of length {$schema->size()}, "
145 . "but recieved one of length $len";
146 }
147 return [];
148 case AvroSchema::RECORD_SCHEMA:
149 case AvroSchema::ERROR_SCHEMA:
150 case AvroSchema::REQUEST_SCHEMA:
151 if ( !is_array( $datum ) ) {
152 return self::wrongType( 'array', $datum );
153 }
154 $errors = [];
155 foreach ( $schema->fields() as $field ) {
156 $name = $field->name();
157 if ( !array_key_exists( $name, $datum ) ) {
158 $errors[$name] = 'Missing expected field';
159 continue;
160 }
161 $result = self::getErrors( $field->type(), $datum[$name] );
162 if ( $result ) {
163 $errors[$name] = $result;
164 }
165 }
166 return $errors;
167 default:
168 return "Unknown avro schema type: {$schema->type}";
169 }
170 }
171
172 public static function typeOf( $datum ) {
173 return is_object( $datum ) ? get_class( $datum ) : gettype( $datum );
174 }
175
176 public static function wrongType( $expected, $datum ) {
177 return "Expected $expected, but recieved " . self::typeOf( $datum );
178 }
179
180 public static function outOfRange( $min, $max, $datum ) {
181 return "Expected value between $min and $max, but recieved $datum";
182 }
183 }