Merge "Warn if stateful ParserOutput transforms are used"
[lhc/web/wiklou.git] / includes / utils / AvroValidator.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 */
20
21 /**
22 * Generate error strings for data that doesn't match the specified
23 * Avro schema. This is very similar to AvroSchema::is_valid_datum(),
24 * but returns error messages instead of a boolean.
25 *
26 * @since 1.26
27 * @author Erik Bernhardson <ebernhardson@wikimedia.org>
28 * @copyright © 2015 Erik Bernhardson and Wikimedia Foundation.
29 */
30 class AvroValidator {
31 /**
32 * @param AvroSchema $schema The rules to conform to.
33 * @param mixed $datum The value to validate against $schema.
34 * @return string|string[] An error or list of errors in the
35 * provided $datum. When no errors exist the empty array is
36 * returned.
37 */
38 public static function getErrors( AvroSchema $schema, $datum ) {
39 switch ( $schema->type ) {
40 case AvroSchema::NULL_TYPE:
41 if ( !is_null( $datum ) ) {
42 return self::wrongType( 'null', $datum );
43 }
44 return [];
45 case AvroSchema::BOOLEAN_TYPE:
46 if ( !is_bool( $datum ) ) {
47 return self::wrongType( 'boolean', $datum );
48 }
49 return [];
50 case AvroSchema::STRING_TYPE:
51 case AvroSchema::BYTES_TYPE:
52 if ( !is_string( $datum ) ) {
53 return self::wrongType( 'string', $datum );
54 }
55 return [];
56 case AvroSchema::INT_TYPE:
57 if ( !is_int( $datum ) ) {
58 return self::wrongType( 'integer', $datum );
59 }
60 if ( AvroSchema::INT_MIN_VALUE > $datum
61 || $datum > AvroSchema::INT_MAX_VALUE
62 ) {
63 return self::outOfRange(
64 AvroSchema::INT_MIN_VALUE,
65 AvroSchema::INT_MAX_VALUE,
66 $datum
67 );
68 }
69 return [];
70 case AvroSchema::LONG_TYPE:
71 if ( !is_int( $datum ) ) {
72 return self::wrongType( 'integer', $datum );
73 }
74 if ( AvroSchema::LONG_MIN_VALUE > $datum
75 || $datum > AvroSchema::LONG_MAX_VALUE
76 ) {
77 return self::outOfRange(
78 AvroSchema::LONG_MIN_VALUE,
79 AvroSchema::LONG_MAX_VALUE,
80 $datum
81 );
82 }
83 return [];
84 case AvroSchema::FLOAT_TYPE:
85 case AvroSchema::DOUBLE_TYPE:
86 if ( !is_float( $datum ) && !is_int( $datum ) ) {
87 return self::wrongType( 'float or integer', $datum );
88 }
89 return [];
90 case AvroSchema::ARRAY_SCHEMA:
91 if ( !is_array( $datum ) ) {
92 return self::wrongType( 'array', $datum );
93 }
94 $errors = [];
95 foreach ( $datum as $d ) {
96 $result = self::getErrors( $schema->items(), $d );
97 if ( $result ) {
98 $errors[] = $result;
99 }
100 }
101 return $errors;
102 case AvroSchema::MAP_SCHEMA:
103 if ( !is_array( $datum ) ) {
104 return self::wrongType( 'array', $datum );
105 }
106 $errors = [];
107 foreach ( $datum as $k => $v ) {
108 if ( !is_string( $k ) ) {
109 $errors[] = self::wrongType( 'string key', $k );
110 }
111 $result = self::getErrors( $schema->values(), $v );
112 if ( $result ) {
113 $errors[$k] = $result;
114 }
115 }
116 return $errors;
117 case AvroSchema::UNION_SCHEMA:
118 $errors = [];
119 foreach ( $schema->schemas() as $schema ) {
120 $result = self::getErrors( $schema, $datum );
121 if ( !$result ) {
122 return [];
123 }
124 $errors[] = $result;
125 }
126 if ( $errors ) {
127 return [ "Expected any one of these to be true", $errors ];
128 }
129 return "No schemas provided to union";
130 case AvroSchema::ENUM_SCHEMA:
131 if ( !in_array( $datum, $schema->symbols() ) ) {
132 $symbols = implode( ', ', $schema->symbols );
133 return "Expected one of $symbols but recieved $datum";
134 }
135 return [];
136 case AvroSchema::FIXED_SCHEMA:
137 if ( !is_string( $datum ) ) {
138 return self::wrongType( 'string', $datum );
139 }
140 $len = strlen( $datum );
141 if ( $len !== $schema->size() ) {
142 return "Expected string of length {$schema->size()}, "
143 . "but recieved one of length $len";
144 }
145 return [];
146 case AvroSchema::RECORD_SCHEMA:
147 case AvroSchema::ERROR_SCHEMA:
148 case AvroSchema::REQUEST_SCHEMA:
149 if ( !is_array( $datum ) ) {
150 return self::wrongType( 'array', $datum );
151 }
152 $errors = [];
153 foreach ( $schema->fields() as $field ) {
154 $name = $field->name();
155 if ( !array_key_exists( $name, $datum ) ) {
156 $errors[$name] = 'Missing expected field';
157 continue;
158 }
159 $result = self::getErrors( $field->type(), $datum[$name] );
160 if ( $result ) {
161 $errors[$name] = $result;
162 }
163 }
164 return $errors;
165 default:
166 return "Unknown avro schema type: {$schema->type}";
167 }
168 }
169
170 public static function typeOf( $datum ) {
171 return is_object( $datum ) ? get_class( $datum ) : gettype( $datum );
172 }
173
174 public static function wrongType( $expected, $datum ) {
175 return "Expected $expected, but recieved " . self::typeOf( $datum );
176 }
177
178 public static function outOfRange( $min, $max, $datum ) {
179 return "Expected value between $min and $max, but recieved $datum";
180 }
181 }