1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2015 Joyent, Inc.
24 */
25
26 #include <sys/types.h>
27 #include <sys/cred.h>
28 #include <sys/sysmacros.h>
29 #include <sys/conf.h>
30 #include <sys/cmn_err.h>
31 #include <sys/list.h>
32 #include <sys/ksynch.h>
33 #include <sys/kmem.h>
34 #include <sys/stream.h>
35 #include <sys/modctl.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/atomic.h>
39 #include <sys/stat.h>
40 #include <sys/modhash.h>
41 #include <sys/strsubr.h>
42 #include <sys/strsun.h>
43 #include <sys/dlpi.h>
44 #include <sys/mac.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_client.h>
47 #include <sys/mac_client_priv.h>
48 #include <sys/mac_ether.h>
49 #include <sys/dls.h>
50 #include <sys/pattr.h>
51 #include <sys/time.h>
52 #include <sys/vlan.h>
53 #include <sys/vnic.h>
54 #include <sys/vnic_impl.h>
55 #include <sys/mac_flow_impl.h>
56 #include <inet/ip_impl.h>
57
58 /*
59 * Note that for best performance, the VNIC is a passthrough design.
60 * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
61 * This MAC client is opened by the VNIC driver at VNIC creation,
62 * and closed when the VNIC is deleted.
63 * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
64 * (upper MAC) detects that the MAC being opened is a VNIC. Instead
65 * of allocating a new MAC client, it asks the VNIC driver to return
66 * the lower MAC client handle associated with the VNIC, and that handle
67 * is returned to the upper MAC client directly. This allows access
68 * by upper MAC clients of the VNIC to have direct access to the lower
69 * MAC client for the control path and data path.
70 *
71 * Due to this passthrough, some of the entry points exported by the
72 * VNIC driver are never directly invoked. These entry points include
73 * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
74 *
75 * VNICs support multiple upper mac clients to enable support for
76 * multiple MAC addresses on the VNIC. When the VNIC is created the
77 * initial mac client is the primary upper mac. Any additional mac
78 * clients are secondary macs.
79 */
80
81 static int vnic_m_start(void *);
82 static void vnic_m_stop(void *);
83 static int vnic_m_promisc(void *, boolean_t);
84 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
85 static int vnic_m_unicst(void *, const uint8_t *);
86 static int vnic_m_stat(void *, uint_t, uint64_t *);
87 static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
88 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
89 const void *);
90 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
91 static void vnic_m_propinfo(void *, const char *, mac_prop_id_t,
92 mac_prop_info_handle_t);
93 static mblk_t *vnic_m_tx(void *, mblk_t *);
94 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
95 static void vnic_notify_cb(void *, mac_notify_type_t);
96 static void vnic_cleanup_secondary_macs(vnic_t *, int);
97
98 static kmem_cache_t *vnic_cache;
99 static krwlock_t vnic_lock;
100 static uint_t vnic_count;
101
102 #define ANCHOR_VNIC_MIN_MTU 576
103 #define ANCHOR_VNIC_MAX_MTU 9000
104
105 /* hash of VNICs (vnic_t's), keyed by VNIC id */
106 static mod_hash_t *vnic_hash;
107 #define VNIC_HASHSZ 64
108 #define VNIC_HASH_KEY(vnic_id) ((mod_hash_key_t)(uintptr_t)vnic_id)
109
110 #define VNIC_M_CALLBACK_FLAGS \
111 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
112
113 static mac_callbacks_t vnic_m_callbacks = {
114 VNIC_M_CALLBACK_FLAGS,
115 vnic_m_stat,
116 vnic_m_start,
117 vnic_m_stop,
118 vnic_m_promisc,
119 vnic_m_multicst,
120 vnic_m_unicst,
121 vnic_m_tx,
122 NULL,
123 vnic_m_ioctl,
124 vnic_m_capab_get,
125 NULL,
126 NULL,
127 vnic_m_setprop,
128 vnic_m_getprop,
129 vnic_m_propinfo
130 };
131
132 void
133 vnic_dev_init(void)
134 {
135 vnic_cache = kmem_cache_create("vnic_cache",
136 sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
137
138 vnic_hash = mod_hash_create_idhash("vnic_hash",
139 VNIC_HASHSZ, mod_hash_null_valdtor);
140
141 rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
142
143 vnic_count = 0;
144 }
145
146 void
147 vnic_dev_fini(void)
148 {
149 ASSERT(vnic_count == 0);
150
151 rw_destroy(&vnic_lock);
152 mod_hash_destroy_idhash(vnic_hash);
153 kmem_cache_destroy(vnic_cache);
154 }
155
156 uint_t
157 vnic_dev_count(void)
158 {
159 return (vnic_count);
160 }
161
162 static vnic_ioc_diag_t
163 vnic_mac2vnic_diag(mac_diag_t diag)
164 {
165 switch (diag) {
166 case MAC_DIAG_MACADDR_NIC:
167 return (VNIC_IOC_DIAG_MACADDR_NIC);
168 case MAC_DIAG_MACADDR_INUSE:
169 return (VNIC_IOC_DIAG_MACADDR_INUSE);
170 case MAC_DIAG_MACADDR_INVALID:
171 return (VNIC_IOC_DIAG_MACADDR_INVALID);
172 case MAC_DIAG_MACADDRLEN_INVALID:
173 return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
174 case MAC_DIAG_MACFACTORYSLOTINVALID:
175 return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
176 case MAC_DIAG_MACFACTORYSLOTUSED:
177 return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
178 case MAC_DIAG_MACFACTORYSLOTALLUSED:
179 return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
180 case MAC_DIAG_MACFACTORYNOTSUP:
181 return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
182 case MAC_DIAG_MACPREFIX_INVALID:
183 return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
184 case MAC_DIAG_MACPREFIXLEN_INVALID:
185 return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
186 case MAC_DIAG_MACNO_HWRINGS:
187 return (VNIC_IOC_DIAG_NO_HWRINGS);
188 default:
189 return (VNIC_IOC_DIAG_NONE);
190 }
191 }
192
193 static int
194 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
195 int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
196 uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
197 uint16_t vid, boolean_t req_hwgrp_flag)
198 {
199 mac_diag_t mac_diag;
200 uint16_t mac_flags = 0;
201 int err;
202 uint_t addr_len;
203
204 if (flags & VNIC_IOC_CREATE_NODUPCHECK)
205 mac_flags |= MAC_UNICAST_NODUPCHECK;
206
207 switch (vnic_addr_type) {
208 case VNIC_MAC_ADDR_TYPE_FIXED:
209 case VNIC_MAC_ADDR_TYPE_VRID:
210 /*
211 * The MAC address value to assign to the VNIC
212 * is already provided in mac_addr_arg. addr_len_ptr_arg
213 * already contains the MAC address length.
214 */
215 break;
216
217 case VNIC_MAC_ADDR_TYPE_RANDOM:
218 /*
219 * Random MAC address. There are two sub-cases:
220 *
221 * 1 - If mac_len == 0, a new MAC address is generated.
222 * The length of the MAC address to generated depends
223 * on the type of MAC used. The prefix to use for the MAC
224 * address is stored in the most significant bytes
225 * of the mac_addr argument, and its length is specified
226 * by the mac_prefix_len argument. This prefix can
227 * correspond to a IEEE OUI in the case of Ethernet,
228 * for example.
229 *
230 * 2 - If mac_len > 0, the address was already picked
231 * randomly, and is now passed back during VNIC
232 * re-creation. The mac_addr argument contains the MAC
233 * address that was generated. We distinguish this
234 * case from the fixed MAC address case, since we
235 * want the user consumers to know, when they query
236 * the list of VNICs, that a VNIC was assigned a
237 * random MAC address vs assigned a fixed address
238 * specified by the user.
239 */
240
241 /*
242 * If it's a pre-generated address, we're done. mac_addr_arg
243 * and addr_len_ptr_arg already contain the MAC address
244 * value and length.
245 */
246 if (*addr_len_ptr_arg > 0)
247 break;
248
249 /* generate a new random MAC address */
250 if ((err = mac_addr_random(vnic->vn_mch,
251 prefix_len, mac_addr_arg, &mac_diag)) != 0) {
252 *diag = vnic_mac2vnic_diag(mac_diag);
253 return (err);
254 }
255 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
256 break;
257
258 case VNIC_MAC_ADDR_TYPE_FACTORY:
259 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
260 if (err != 0) {
261 if (err == EINVAL)
262 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
263 if (err == EBUSY)
264 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
265 if (err == ENOSPC)
266 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
267 return (err);
268 }
269
270 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
271 mac_addr_arg, &addr_len, NULL, NULL);
272 *addr_len_ptr_arg = addr_len;
273 break;
274
275 case VNIC_MAC_ADDR_TYPE_AUTO:
276 /* first try to allocate a factory MAC address */
277 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
278 if (err == 0) {
279 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
280 mac_addr_arg, &addr_len, NULL, NULL);
281 vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
282 *addr_len_ptr_arg = addr_len;
283 break;
284 }
285
286 /*
287 * Allocating a factory MAC address failed, generate a
288 * random MAC address instead.
289 */
290 if ((err = mac_addr_random(vnic->vn_mch,
291 prefix_len, mac_addr_arg, &mac_diag)) != 0) {
292 *diag = vnic_mac2vnic_diag(mac_diag);
293 return (err);
294 }
295 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
296 vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
297 break;
298 case VNIC_MAC_ADDR_TYPE_PRIMARY:
299 /*
300 * We get the address here since we copy it in the
301 * vnic's vn_addr.
302 * We can't ask for hardware resources since we
303 * don't currently support hardware classification
304 * for these MAC clients.
305 */
306 if (req_hwgrp_flag) {
307 *diag = VNIC_IOC_DIAG_NO_HWRINGS;
308 return (ENOTSUP);
309 }
310 mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
311 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
312 mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
313 break;
314 }
315
316 vnic->vn_addr_type = vnic_addr_type;
317
318 err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
319 &vnic->vn_muh, vid, &mac_diag);
320 if (err != 0) {
321 if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
322 /* release factory MAC address */
323 mac_addr_factory_release(vnic->vn_mch, *addr_slot);
324 }
325 *diag = vnic_mac2vnic_diag(mac_diag);
326 }
327
328 return (err);
329 }
330
331 /*
332 * Create a new VNIC upon request from administrator.
333 * Returns 0 on success, an errno on failure.
334 */
335 /* ARGSUSED */
336 int
337 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
338 vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
339 int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
340 int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
341 cred_t *credp)
342 {
343 vnic_t *vnic;
344 mac_register_t *mac;
345 int err;
346 boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
347 char vnic_name[MAXNAMELEN];
348 const mac_info_t *minfop;
349 uint32_t req_hwgrp_flag = B_FALSE;
350
351 *diag = VNIC_IOC_DIAG_NONE;
352
353 rw_enter(&vnic_lock, RW_WRITER);
354
355 /* does a VNIC with the same id already exist? */
356 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
357 (mod_hash_val_t *)&vnic);
358 if (err == 0) {
359 rw_exit(&vnic_lock);
360 return (EEXIST);
361 }
362
363 vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
364 if (vnic == NULL) {
365 rw_exit(&vnic_lock);
366 return (ENOMEM);
367 }
368
369 bzero(vnic, sizeof (*vnic));
370
371 vnic->vn_id = vnic_id;
372 vnic->vn_link_id = linkid;
373 vnic->vn_vrid = vrid;
374 vnic->vn_af = af;
375
376 if (!is_anchor) {
377 if (linkid == DATALINK_INVALID_LINKID) {
378 err = EINVAL;
379 goto bail;
380 }
381
382 /*
383 * Open the lower MAC and assign its initial bandwidth and
384 * MAC address. We do this here during VNIC creation and
385 * do not wait until the upper MAC client open so that we
386 * can validate the VNIC creation parameters (bandwidth,
387 * MAC address, etc) and reserve a factory MAC address if
388 * one was requested.
389 */
390 err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
391 if (err != 0)
392 goto bail;
393
394 /*
395 * VNIC(vlan) over VNICs(vlans) is not supported.
396 */
397 if (mac_is_vnic(vnic->vn_lower_mh)) {
398 err = EINVAL;
399 goto bail;
400 }
401
402 /* only ethernet support for now */
403 minfop = mac_info(vnic->vn_lower_mh);
404 if (minfop->mi_nativemedia != DL_ETHER) {
405 err = ENOTSUP;
406 goto bail;
407 }
408
409 (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
410 NULL);
411 err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
412 vnic_name, MAC_OPEN_FLAGS_IS_VNIC);
413 if (err != 0)
414 goto bail;
415
416 /* assign a MAC address to the VNIC */
417
418 err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
419 mac_prefix_len, mac_len, mac_addr, flags, diag, vid,
420 req_hwgrp_flag);
421 if (err != 0) {
422 vnic->vn_muh = NULL;
423 if (diag != NULL && req_hwgrp_flag)
424 *diag = VNIC_IOC_DIAG_NO_HWRINGS;
425 goto bail;
426 }
427
428 /* register to receive notification from underlying MAC */
429 vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
430 vnic);
431
432 *vnic_addr_type = vnic->vn_addr_type;
433 vnic->vn_addr_len = *mac_len;
434 vnic->vn_vid = vid;
435
436 bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
437
438 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
439 vnic->vn_slot_id = *mac_slot;
440
441 /*
442 * Set the initial VNIC capabilities. If the VNIC is created
443 * over MACs which does not support nactive vlan, disable
444 * VNIC's hardware checksum capability if its VID is not 0,
445 * since the underlying MAC would get the hardware checksum
446 * offset wrong in case of VLAN packets.
447 */
448 if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh,
449 MAC_CAPAB_NO_NATIVEVLAN, NULL)) {
450 if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
451 &vnic->vn_hcksum_txflags))
452 vnic->vn_hcksum_txflags = 0;
453 } else {
454 vnic->vn_hcksum_txflags = 0;
455 }
456 }
457
458 /* register with the MAC module */
459 if ((mac = mac_alloc(MAC_VERSION)) == NULL)
460 goto bail;
461
462 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
463 mac->m_driver = vnic;
464 mac->m_dip = vnic_get_dip();
465 mac->m_instance = (uint_t)-1;
466 mac->m_src_addr = vnic->vn_addr;
467 mac->m_callbacks = &vnic_m_callbacks;
468
469 if (!is_anchor) {
470 /*
471 * If this is a VNIC based VLAN, then we check for the
472 * margin unless it has been created with the force
473 * flag. If we are configuring a VLAN over an etherstub,
474 * we don't check the margin even if force is not set.
475 */
476 if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
477 if (vid != VLAN_ID_NONE)
478 vnic->vn_force = B_TRUE;
479 /*
480 * As the current margin size of the underlying mac is
481 * used to determine the margin size of the VNIC
482 * itself, request the underlying mac not to change
483 * to a smaller margin size.
484 */
485 err = mac_margin_add(vnic->vn_lower_mh,
486 &vnic->vn_margin, B_TRUE);
487 ASSERT(err == 0);
488 } else {
489 vnic->vn_margin = VLAN_TAGSZ;
490 err = mac_margin_add(vnic->vn_lower_mh,
491 &vnic->vn_margin, B_FALSE);
492 if (err != 0) {
493 mac_free(mac);
494 if (diag != NULL)
495 *diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
496 goto bail;
497 }
498 }
499
500 mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
501 &mac->m_max_sdu);
502 err = mac_mtu_add(vnic->vn_lower_mh, &mac->m_max_sdu, B_FALSE);
503 if (err != 0) {
504 VERIFY(mac_margin_remove(vnic->vn_lower_mh,
505 vnic->vn_margin) == 0);
506 mac_free(mac);
507 if (diag != NULL)
508 *diag = VNIC_IOC_DIAG_MACMTU_INVALID;
509 goto bail;
510 }
511 vnic->vn_mtu = mac->m_max_sdu;
512 } else {
513 vnic->vn_margin = VLAN_TAGSZ;
514 mac->m_min_sdu = 1;
515 mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
516 vnic->vn_mtu = ANCHOR_VNIC_MAX_MTU;
517 }
518
519 mac->m_margin = vnic->vn_margin;
520
521 err = mac_register(mac, &vnic->vn_mh);
522 mac_free(mac);
523 if (err != 0) {
524 if (!is_anchor) {
525 VERIFY(mac_mtu_remove(vnic->vn_lower_mh,
526 vnic->vn_mtu) == 0);
527 VERIFY(mac_margin_remove(vnic->vn_lower_mh,
528 vnic->vn_margin) == 0);
529 }
530 goto bail;
531 }
532
533 /* Set the VNIC's MAC in the client */
534 if (!is_anchor) {
535 mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp);
536
537 if (mrp != NULL) {
538 if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 ||
539 (mrp->mrp_mask & MRP_TX_RINGS) != 0) {
540 req_hwgrp_flag = B_TRUE;
541 }
542 err = mac_client_set_resources(vnic->vn_mch, mrp);
543 if (err != 0) {
544 VERIFY(mac_mtu_remove(vnic->vn_lower_mh,
545 vnic->vn_mtu) == 0);
546 VERIFY(mac_margin_remove(vnic->vn_lower_mh,
547 vnic->vn_margin) == 0);
548 (void) mac_unregister(vnic->vn_mh);
549 goto bail;
550 }
551 }
552 }
553
554 err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
555 if (err != 0) {
556 VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
557 vnic->vn_margin) == 0);
558 if (!is_anchor) {
559 VERIFY(mac_mtu_remove(vnic->vn_lower_mh,
560 vnic->vn_mtu) == 0);
561 VERIFY(mac_margin_remove(vnic->vn_lower_mh,
562 vnic->vn_margin) == 0);
563 }
564 (void) mac_unregister(vnic->vn_mh);
565 goto bail;
566 }
567
568 /* add new VNIC to hash table */
569 err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
570 (mod_hash_val_t)vnic);
571 ASSERT(err == 0);
572 vnic_count++;
573
574 /*
575 * Now that we've enabled this VNIC, we should go through and update the
576 * link state by setting it to our parents.
577 */
578 vnic->vn_enabled = B_TRUE;
579
580 if (is_anchor) {
581 mac_link_update(vnic->vn_mh, LINK_STATE_UP);
582 } else {
583 mac_link_update(vnic->vn_mh,
584 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
585 }
586
587 rw_exit(&vnic_lock);
588
589 return (0);
590
591 bail:
592 rw_exit(&vnic_lock);
593 if (!is_anchor) {
594 if (vnic->vn_mnh != NULL)
595 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
596 if (vnic->vn_muh != NULL)
597 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
598 if (vnic->vn_mch != NULL)
599 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
600 if (vnic->vn_lower_mh != NULL)
601 mac_close(vnic->vn_lower_mh);
602 }
603
604 kmem_cache_free(vnic_cache, vnic);
605 return (err);
606 }
607
608 /*
609 * Modify the properties of an existing VNIC.
610 */
611 /* ARGSUSED */
612 int
613 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
614 vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
615 uint_t mac_slot, mac_resource_props_t *mrp)
616 {
617 vnic_t *vnic = NULL;
618
619 rw_enter(&vnic_lock, RW_WRITER);
620
621 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
622 (mod_hash_val_t *)&vnic) != 0) {
623 rw_exit(&vnic_lock);
624 return (ENOENT);
625 }
626
627 rw_exit(&vnic_lock);
628
629 return (0);
630 }
631
632 /* ARGSUSED */
633 int
634 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
635 {
636 vnic_t *vnic = NULL;
637 mod_hash_val_t val;
638 datalink_id_t tmpid;
639 int rc;
640
641 rw_enter(&vnic_lock, RW_WRITER);
642
643 if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
644 (mod_hash_val_t *)&vnic) != 0) {
645 rw_exit(&vnic_lock);
646 return (ENOENT);
647 }
648
649 if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
650 rw_exit(&vnic_lock);
651 return (rc);
652 }
653
654 ASSERT(vnic_id == tmpid);
655
656 /*
657 * We cannot unregister the MAC yet. Unregistering would
658 * free up mac_impl_t which should not happen at this time.
659 * So disable mac_impl_t by calling mac_disable(). This will prevent
660 * any new claims on mac_impl_t.
661 */
662 if ((rc = mac_disable(vnic->vn_mh)) != 0) {
663 (void) dls_devnet_create(vnic->vn_mh, vnic_id,
664 crgetzoneid(credp));
665 rw_exit(&vnic_lock);
666 return (rc);
667 }
668
669 vnic_cleanup_secondary_macs(vnic, vnic->vn_nhandles);
670
671 vnic->vn_enabled = B_FALSE;
672 (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
673 ASSERT(vnic == (vnic_t *)val);
674 vnic_count--;
675 rw_exit(&vnic_lock);
676
677 /*
678 * XXX-nicolas shouldn't have a void cast here, if it's
679 * expected that the function will never fail, then we should
680 * have an ASSERT().
681 */
682 (void) mac_unregister(vnic->vn_mh);
683
684 if (vnic->vn_lower_mh != NULL) {
685 /*
686 * Check if MAC address for the vnic was obtained from the
687 * factory MAC addresses. If yes, release it.
688 */
689 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
690 (void) mac_addr_factory_release(vnic->vn_mch,
691 vnic->vn_slot_id);
692 }
693 (void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
694 (void) mac_mtu_remove(vnic->vn_lower_mh, vnic->vn_mtu);
695 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
696 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
697 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
698 mac_close(vnic->vn_lower_mh);
699 }
700
701 kmem_cache_free(vnic_cache, vnic);
702 return (0);
703 }
704
705 /* ARGSUSED */
706 mblk_t *
707 vnic_m_tx(void *arg, mblk_t *mp_chain)
708 {
709 /*
710 * This function could be invoked for an anchor VNIC when sending
711 * broadcast and multicast packets, and unicast packets which did
712 * not match any local known destination.
713 */
714 freemsgchain(mp_chain);
715 return (NULL);
716 }
717
718 /*ARGSUSED*/
719 static void
720 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
721 {
722 miocnak(q, mp, 0, ENOTSUP);
723 }
724
725 /*
726 * This entry point cannot be passed-through, since it is invoked
727 * for the per-VNIC kstats which must be exported independently
728 * of the existence of VNIC MAC clients.
729 */
730 static int
731 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
732 {
733 vnic_t *vnic = arg;
734 int rval = 0;
735
736 if (vnic->vn_lower_mh == NULL) {
737 /*
738 * It's an anchor VNIC, which does not have any
739 * statistics in itself.
740 */
741 return (ENOTSUP);
742 }
743
744 /*
745 * ENOTSUP must be reported for unsupported stats, the VNIC
746 * driver reports a subset of the stats that would
747 * be returned by a real piece of hardware.
748 */
749
750 switch (stat) {
751 case MAC_STAT_LINK_STATE:
752 case MAC_STAT_LINK_UP:
753 case MAC_STAT_PROMISC:
754 case MAC_STAT_IFSPEED:
755 case MAC_STAT_MULTIRCV:
756 case MAC_STAT_MULTIXMT:
757 case MAC_STAT_BRDCSTRCV:
758 case MAC_STAT_BRDCSTXMT:
759 case MAC_STAT_OPACKETS:
760 case MAC_STAT_OBYTES:
761 case MAC_STAT_IERRORS:
762 case MAC_STAT_OERRORS:
763 case MAC_STAT_RBYTES:
764 case MAC_STAT_IPACKETS:
765 *val = mac_client_stat_get(vnic->vn_mch, stat);
766 break;
767 default:
768 rval = ENOTSUP;
769 }
770
771 return (rval);
772 }
773
774 /*
775 * Invoked by the upper MAC to retrieve the lower MAC client handle
776 * corresponding to a VNIC. A pointer to this function is obtained
777 * by the upper MAC via capability query.
778 *
779 * XXX-nicolas Note: this currently causes all VNIC MAC clients to
780 * receive the same MAC client handle for the same VNIC. This is ok
781 * as long as we have only one VNIC MAC client which sends and
782 * receives data, but we don't currently enforce this at the MAC layer.
783 */
784 static void *
785 vnic_mac_client_handle(void *vnic_arg)
786 {
787 vnic_t *vnic = vnic_arg;
788
789 return (vnic->vn_mch);
790 }
791
792 /*
793 * Invoked when updating the primary MAC so that the secondary MACs are
794 * kept in sync.
795 */
796 static void
797 vnic_mac_secondary_update(void *vnic_arg)
798 {
799 vnic_t *vn = vnic_arg;
800 int i;
801
802 for (i = 1; i <= vn->vn_nhandles; i++) {
803 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]);
804 }
805 }
806
807 /*
808 * Return information about the specified capability.
809 */
810 /* ARGSUSED */
811 static boolean_t
812 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
813 {
814 vnic_t *vnic = arg;
815
816 switch (cap) {
817 case MAC_CAPAB_HCKSUM: {
818 uint32_t *hcksum_txflags = cap_data;
819
820 *hcksum_txflags = vnic->vn_hcksum_txflags &
821 (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
822 HCKSUM_INET_PARTIAL);
823 break;
824 }
825 case MAC_CAPAB_VNIC: {
826 mac_capab_vnic_t *vnic_capab = cap_data;
827
828 if (vnic->vn_lower_mh == NULL) {
829 /*
830 * It's an anchor VNIC, we don't have an underlying
831 * NIC and MAC client handle.
832 */
833 return (B_FALSE);
834 }
835
836 if (vnic_capab != NULL) {
837 vnic_capab->mcv_arg = vnic;
838 vnic_capab->mcv_mac_client_handle =
839 vnic_mac_client_handle;
840 vnic_capab->mcv_mac_secondary_update =
841 vnic_mac_secondary_update;
842 }
843 break;
844 }
845 case MAC_CAPAB_ANCHOR_VNIC: {
846 /* since it's an anchor VNIC we don't have lower mac handle */
847 if (vnic->vn_lower_mh == NULL) {
848 ASSERT(vnic->vn_link_id == 0);
849 return (B_TRUE);
850 }
851 return (B_FALSE);
852 }
853 case MAC_CAPAB_NO_NATIVEVLAN:
854 return (B_FALSE);
855 case MAC_CAPAB_NO_ZCOPY:
856 return (B_TRUE);
857 case MAC_CAPAB_VRRP: {
858 mac_capab_vrrp_t *vrrp_capab = cap_data;
859
860 if (vnic->vn_vrid != 0) {
861 if (vrrp_capab != NULL)
862 vrrp_capab->mcv_af = vnic->vn_af;
863 return (B_TRUE);
864 }
865 return (B_FALSE);
866 }
867 default:
868 return (B_FALSE);
869 }
870 return (B_TRUE);
871 }
872
873 /* ARGSUSED */
874 static int
875 vnic_m_start(void *arg)
876 {
877 return (0);
878 }
879
880 /* ARGSUSED */
881 static void
882 vnic_m_stop(void *arg)
883 {
884 }
885
886 /* ARGSUSED */
887 static int
888 vnic_m_promisc(void *arg, boolean_t on)
889 {
890 return (0);
891 }
892
893 /* ARGSUSED */
894 static int
895 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
896 {
897 return (0);
898 }
899
900 static int
901 vnic_m_unicst(void *arg, const uint8_t *macaddr)
902 {
903 vnic_t *vnic = arg;
904
905 return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
906 }
907
908 static void
909 vnic_cleanup_secondary_macs(vnic_t *vn, int cnt)
910 {
911 int i;
912
913 /* Remove existing secondaries (primary is at 0) */
914 for (i = 1; i <= cnt; i++) {
915 mac_rx_clear(vn->vn_mc_handles[i]);
916
917 /* unicast handle might not have been set yet */
918 if (vn->vn_mu_handles[i] != NULL)
919 (void) mac_unicast_remove(vn->vn_mc_handles[i],
920 vn->vn_mu_handles[i]);
921
922 mac_secondary_cleanup(vn->vn_mc_handles[i]);
923
924 mac_client_close(vn->vn_mc_handles[i], MAC_CLOSE_FLAGS_IS_VNIC);
925
926 vn->vn_mu_handles[i] = NULL;
927 vn->vn_mc_handles[i] = NULL;
928 }
929
930 vn->vn_nhandles = 0;
931 }
932
933 /*
934 * Setup secondary MAC addresses on the vnic. Due to limitations in the mac
935 * code, each mac address must be associated with a mac_client (and the
936 * flow that goes along with the client) so we need to create those clients
937 * here.
938 */
939 static int
940 vnic_set_secondary_macs(vnic_t *vn, mac_secondary_addr_t *msa)
941 {
942 int i, err;
943 char primary_name[MAXNAMELEN];
944
945 /* First, remove pre-existing secondaries */
946 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR);
947 vnic_cleanup_secondary_macs(vn, vn->vn_nhandles);
948
949 if (msa->ms_addrcnt == (uint32_t)-1)
950 msa->ms_addrcnt = 0;
951
952 vn->vn_nhandles = msa->ms_addrcnt;
953
954 (void) dls_mgmt_get_linkinfo(vn->vn_id, primary_name, NULL, NULL, NULL);
955
956 /*
957 * Now add the new secondary MACs
958 * Recall that the primary MAC address is the first element.
959 * The secondary clients are named after the primary with their
960 * index to distinguish them.
961 */
962 for (i = 1; i <= vn->vn_nhandles; i++) {
963 uint8_t *addr;
964 mac_diag_t mac_diag;
965 char secondary_name[MAXNAMELEN];
966
967 (void) snprintf(secondary_name, sizeof (secondary_name),
968 "%s%02d", primary_name, i);
969
970 err = mac_client_open(vn->vn_lower_mh, &vn->vn_mc_handles[i],
971 secondary_name, MAC_OPEN_FLAGS_IS_VNIC);
972 if (err != 0) {
973 /* Remove any that we successfully added */
974 vnic_cleanup_secondary_macs(vn, --i);
975 return (err);
976 }
977
978 /*
979 * Assign a MAC address to the VNIC
980 *
981 * Normally this would be done with vnic_unicast_add but since
982 * we know these are fixed adddresses, and since we need to
983 * save this in the proper array slot, we bypass that function
984 * and go direct.
985 */
986 addr = msa->ms_addrs[i - 1];
987 err = mac_unicast_add(vn->vn_mc_handles[i], addr, 0,
988 &vn->vn_mu_handles[i], vn->vn_vid, &mac_diag);
989 if (err != 0) {
990 /* Remove any that we successfully added */
991 vnic_cleanup_secondary_macs(vn, i);
992 return (err);
993 }
994
995 /*
996 * Setup the secondary the same way as the primary (i.e.
997 * receiver function/argument (e.g. i_dls_link_rx, mac_pkt_drop,
998 * etc.), the promisc list, and the resource controls).
999 */
1000 mac_secondary_dup(vn->vn_mc_handles[0], vn->vn_mc_handles[i]);
1001 }
1002
1003 return (0);
1004 }
1005
1006 static int
1007 vnic_get_secondary_macs(vnic_t *vn, uint_t pr_valsize, void *pr_val)
1008 {
1009 int i;
1010 mac_secondary_addr_t msa;
1011
1012 if (pr_valsize < sizeof (msa))
1013 return (EINVAL);
1014
1015 /* Get existing addresses (primary is at 0) */
1016 ASSERT(vn->vn_nhandles < MPT_MAXMACADDR);
1017 for (i = 1; i <= vn->vn_nhandles; i++) {
1018 ASSERT(vn->vn_mc_handles[i] != NULL);
1019 mac_unicast_secondary_get(vn->vn_mc_handles[i],
1020 msa.ms_addrs[i - 1]);
1021 }
1022 msa.ms_addrcnt = vn->vn_nhandles;
1023
1024 bcopy(&msa, pr_val, sizeof (msa));
1025 return (0);
1026 }
1027
1028 /*
1029 * Callback functions for set/get of properties
1030 */
1031 /*ARGSUSED*/
1032 static int
1033 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
1034 uint_t pr_valsize, const void *pr_val)
1035 {
1036 int err = 0;
1037 vnic_t *vn = m_driver;
1038
1039 switch (pr_num) {
1040 case MAC_PROP_MTU: {
1041 uint32_t mtu;
1042
1043 if (pr_valsize < sizeof (mtu)) {
1044 err = EINVAL;
1045 break;
1046 }
1047 bcopy(pr_val, &mtu, sizeof (mtu));
1048
1049 if (vn->vn_link_id == DATALINK_INVALID_LINKID) {
1050 if (mtu < ANCHOR_VNIC_MIN_MTU ||
1051 mtu > ANCHOR_VNIC_MAX_MTU) {
1052 err = EINVAL;
1053 break;
1054 }
1055 } else {
1056 err = mac_mtu_add(vn->vn_lower_mh, &mtu, B_FALSE);
1057 /*
1058 * If it's not supported to set a value here, translate
1059 * that to EINVAL, so user land gets a better idea of
1060 * what went wrong. This realistically means that they
1061 * violated the output of prop info.
1062 */
1063 if (err == ENOTSUP)
1064 err = EINVAL;
1065 if (err != 0)
1066 break;
1067 VERIFY(mac_mtu_remove(vn->vn_lower_mh,
1068 vn->vn_mtu) == 0);
1069 }
1070 vn->vn_mtu = mtu;
1071 err = mac_maxsdu_update(vn->vn_mh, mtu);
1072 break;
1073 }
1074 case MAC_PROP_SECONDARY_ADDRS: {
1075 mac_secondary_addr_t msa;
1076
1077 bcopy(pr_val, &msa, sizeof (msa));
1078 err = vnic_set_secondary_macs(vn, &msa);
1079 break;
1080 }
1081 default:
1082 err = ENOTSUP;
1083 break;
1084 }
1085 return (err);
1086 }
1087
1088 /* ARGSUSED */
1089 static int
1090 vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1091 uint_t pr_valsize, void *pr_val)
1092 {
1093 vnic_t *vn = arg;
1094 int ret = 0;
1095
1096 switch (pr_num) {
1097 case MAC_PROP_SECONDARY_ADDRS:
1098 ret = vnic_get_secondary_macs(vn, pr_valsize, pr_val);
1099 break;
1100 default:
1101 ret = EINVAL;
1102 break;
1103 }
1104
1105 return (ret);
1106 }
1107
1108 /* ARGSUSED */
1109 static void vnic_m_propinfo(void *m_driver, const char *pr_name,
1110 mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
1111 {
1112 vnic_t *vn = m_driver;
1113
1114 switch (pr_num) {
1115 case MAC_PROP_MTU:
1116 if (vn->vn_link_id == DATALINK_INVALID_LINKID) {
1117 mac_prop_info_set_range_uint32(prh,
1118 ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU);
1119 } else {
1120 uint32_t max;
1121 mac_perim_handle_t mph;
1122 mac_propval_range_t range;
1123
1124 /*
1125 * The valid range for a VNIC's MTU is the minimum that
1126 * the device supports and the current value of the
1127 * device. A VNIC cannot increase the current MTU of the
1128 * device. Therefore we need to get the range from the
1129 * propinfo endpoint and current mtu from the
1130 * traditional property endpoint.
1131 */
1132 mac_perim_enter_by_mh(vn->vn_lower_mh, &mph);
1133 if (mac_get_prop(vn->vn_lower_mh, MAC_PROP_MTU, "mtu",
1134 &max, sizeof (uint32_t)) != 0) {
1135 mac_perim_exit(mph);
1136 return;
1137 }
1138
1139 range.mpr_count = 1;
1140 if (mac_prop_info(vn->vn_lower_mh, MAC_PROP_MTU, "mtu",
1141 NULL, 0, &range, NULL) != 0) {
1142 mac_perim_exit(mph);
1143 return;
1144 }
1145
1146 mac_prop_info_set_default_uint32(prh, max);
1147 mac_prop_info_set_range_uint32(prh,
1148 range.mpr_range_uint32[0].mpur_min, max);
1149 mac_perim_exit(mph);
1150 }
1151 break;
1152 }
1153 }
1154
1155
1156 int
1157 vnic_info(vnic_info_t *info, cred_t *credp)
1158 {
1159 vnic_t *vnic;
1160 int err;
1161
1162 /* Make sure that the VNIC link is visible from the caller's zone. */
1163 if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
1164 return (ENOENT);
1165
1166 rw_enter(&vnic_lock, RW_WRITER);
1167
1168 err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
1169 (mod_hash_val_t *)&vnic);
1170 if (err != 0) {
1171 rw_exit(&vnic_lock);
1172 return (ENOENT);
1173 }
1174
1175 info->vn_link_id = vnic->vn_link_id;
1176 info->vn_mac_addr_type = vnic->vn_addr_type;
1177 info->vn_mac_len = vnic->vn_addr_len;
1178 bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
1179 info->vn_mac_slot = vnic->vn_slot_id;
1180 info->vn_mac_prefix_len = 0;
1181 info->vn_vid = vnic->vn_vid;
1182 info->vn_force = vnic->vn_force;
1183 info->vn_vrid = vnic->vn_vrid;
1184 info->vn_af = vnic->vn_af;
1185
1186 bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
1187 if (vnic->vn_mch != NULL)
1188 mac_client_get_resources(vnic->vn_mch,
1189 &info->vn_resource_props);
1190
1191 rw_exit(&vnic_lock);
1192 return (0);
1193 }
1194
1195 static void
1196 vnic_notify_cb(void *arg, mac_notify_type_t type)
1197 {
1198 vnic_t *vnic = arg;
1199
1200 /*
1201 * Do not deliver notifications if the vnic is not fully initialized
1202 * or is in process of being torn down.
1203 */
1204 if (!vnic->vn_enabled)
1205 return;
1206
1207 switch (type) {
1208 case MAC_NOTE_UNICST:
1209 /*
1210 * Only the VLAN VNIC needs to be notified with primary MAC
1211 * address change.
1212 */
1213 if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
1214 return;
1215
1216 /* the unicast MAC address value */
1217 mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
1218
1219 /* notify its upper layer MAC about MAC address change */
1220 mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
1221 break;
1222
1223 case MAC_NOTE_LINK:
1224 mac_link_update(vnic->vn_mh,
1225 mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
1226 break;
1227
1228 default:
1229 break;
1230 }
1231 }